142 files changed, 69577 insertions, 0 deletions
diff --git a/media/ffvpx/libavcodec/allcodecs.c b/media/ffvpx/libavcodec/allcodecs.c
new file mode 100644
index 000000000..54efaad34
--- /dev/null
+++ b/media/ffvpx/libavcodec/allcodecs.c
@@ -0,0 +1,687 @@
+/*
+ * Provide registration of all codecs, parsers and bitstream filters for libavcodec.
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Provide registration of all codecs, parsers and bitstream filters for libavcodec.
+ */
+
+#include "config.h"
+#include "avcodec.h"
+#include "version.h"
+
+#define REGISTER_HWACCEL(X, x)                                          \
+    {                                                                   \
+        extern AVHWAccel ff_##x##_hwaccel;                              \
+        if (CONFIG_##X##_HWACCEL)                                       \
+            av_register_hwaccel(&ff_##x##_hwaccel);                     \
+    }
+
+#define REGISTER_ENCODER(X, x)                                          \
+    {                                                                   \
+        extern AVCodec ff_##x##_encoder;                                \
+        if (CONFIG_##X##_ENCODER)                                       \
+            avcodec_register(&ff_##x##_encoder);                        \
+    }
+
+#define REGISTER_DECODER(X, x)                                          \
+    {                                                                   \
+        extern AVCodec ff_##x##_decoder;                                \
+        if (CONFIG_##X##_DECODER)                                       \
+            avcodec_register(&ff_##x##_decoder);                        \
+    }
+
+#define REGISTER_ENCDEC(X, x) REGISTER_ENCODER(X, x); REGISTER_DECODER(X, x)
+
+#define REGISTER_PARSER(X, x)                                           \
+    {                                                                   \
+        extern AVCodecParser ff_##x##_parser;                           \
+        if (CONFIG_##X##_PARSER)                                        \
+            av_register_codec_parser(&ff_##x##_parser);                 \
+    }
+
+void avcodec_register_all(void)
+{
+    static int initialized;
+
+    if (initialized)
+        return;
+    initialized = 1;
+
+    /* hardware accelerators */
+    REGISTER_HWACCEL(H263_VAAPI,        h263_vaapi);
+    REGISTER_HWACCEL(H263_VIDEOTOOLBOX, h263_videotoolbox);
+    REGISTER_HWACCEL(H264_CUVID,        h264_cuvid);
+    REGISTER_HWACCEL(H264_D3D11VA,      h264_d3d11va);
+    REGISTER_HWACCEL(H264_DXVA2,        h264_dxva2);
+    REGISTER_HWACCEL(H264_MMAL,         h264_mmal);
+    REGISTER_HWACCEL(H264_QSV,          h264_qsv);
+    REGISTER_HWACCEL(H264_VAAPI,        h264_vaapi);
+    REGISTER_HWACCEL(H264_VDA,          h264_vda);
+    REGISTER_HWACCEL(H264_VDA_OLD,      h264_vda_old);
+    REGISTER_HWACCEL(H264_VDPAU,        h264_vdpau);
+    REGISTER_HWACCEL(H264_VIDEOTOOLBOX, h264_videotoolbox);
+    REGISTER_HWACCEL(HEVC_CUVID,        hevc_cuvid);
+    REGISTER_HWACCEL(HEVC_D3D11VA,      hevc_d3d11va);
+    REGISTER_HWACCEL(HEVC_DXVA2,        hevc_dxva2);
+    REGISTER_HWACCEL(HEVC_QSV,          hevc_qsv);
+    REGISTER_HWACCEL(HEVC_VAAPI,        hevc_vaapi);
+    REGISTER_HWACCEL(HEVC_VDPAU,        hevc_vdpau);
+    REGISTER_HWACCEL(MPEG1_XVMC,        mpeg1_xvmc);
+    REGISTER_HWACCEL(MPEG1_VDPAU,       mpeg1_vdpau);
+    REGISTER_HWACCEL(MPEG1_VIDEOTOOLBOX, mpeg1_videotoolbox);
+    REGISTER_HWACCEL(MPEG2_XVMC,        mpeg2_xvmc);
+    REGISTER_HWACCEL(MPEG2_D3D11VA,     mpeg2_d3d11va);
+    REGISTER_HWACCEL(MPEG2_DXVA2,       mpeg2_dxva2);
+    REGISTER_HWACCEL(MPEG2_MMAL,        mpeg2_mmal);
+    REGISTER_HWACCEL(MPEG2_QSV,         mpeg2_qsv);
+    REGISTER_HWACCEL(MPEG2_VAAPI,       mpeg2_vaapi);
+    REGISTER_HWACCEL(MPEG2_VDPAU,       mpeg2_vdpau);
+    REGISTER_HWACCEL(MPEG2_VIDEOTOOLBOX, mpeg2_videotoolbox);
+    REGISTER_HWACCEL(MPEG4_MMAL,        mpeg4_mmal);
+    REGISTER_HWACCEL(MPEG4_VAAPI,       mpeg4_vaapi);
+    REGISTER_HWACCEL(MPEG4_VDPAU,       mpeg4_vdpau);
+    REGISTER_HWACCEL(MPEG4_VIDEOTOOLBOX, mpeg4_videotoolbox);
+    REGISTER_HWACCEL(VC1_CUVID,         vc1_cuvid);
+    REGISTER_HWACCEL(VC1_D3D11VA,       vc1_d3d11va);
+    REGISTER_HWACCEL(VC1_DXVA2,         vc1_dxva2);
+    REGISTER_HWACCEL(VC1_VAAPI,         vc1_vaapi);
+    REGISTER_HWACCEL(VC1_VDPAU,         vc1_vdpau);
+    REGISTER_HWACCEL(VC1_MMAL,          vc1_mmal);
+    REGISTER_HWACCEL(VC1_QSV,           vc1_qsv);
+    REGISTER_HWACCEL(VP8_CUVID,         vp8_cuvid);
+    REGISTER_HWACCEL(VP9_CUVID,         vp9_cuvid);
+    REGISTER_HWACCEL(VP9_D3D11VA,       vp9_d3d11va);
+    REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
+    REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
+    REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
+    REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
+    REGISTER_HWACCEL(WMV3_VAAPI,        wmv3_vaapi);
+    REGISTER_HWACCEL(WMV3_VDPAU,        wmv3_vdpau);
+
+    /* video codecs */
+    REGISTER_ENCODER(A64MULTI,          a64multi);
+    REGISTER_ENCODER(A64MULTI5,         a64multi5);
+    REGISTER_DECODER(AASC,              aasc);
+    REGISTER_DECODER(AIC,               aic);
+    REGISTER_ENCDEC (ALIAS_PIX,         alias_pix);
+    REGISTER_ENCDEC (AMV,               amv);
+    REGISTER_DECODER(ANM,               anm);
+    REGISTER_DECODER(ANSI,              ansi);
+    REGISTER_ENCDEC (APNG,              apng);
+    REGISTER_ENCDEC (ASV1,              asv1);
+    REGISTER_ENCDEC (ASV2,              asv2);
+    REGISTER_DECODER(AURA,              aura);
+    REGISTER_DECODER(AURA2,             aura2);
+    REGISTER_ENCDEC (AVRP,              avrp);
+    REGISTER_DECODER(AVRN,              avrn);
+    REGISTER_DECODER(AVS,               avs);
+    REGISTER_ENCDEC (AVUI,              avui);
+    REGISTER_ENCDEC (AYUV,              ayuv);
+    REGISTER_DECODER(BETHSOFTVID,       bethsoftvid);
+    REGISTER_DECODER(BFI,               bfi);
+    REGISTER_DECODER(BINK,              bink);
+    REGISTER_ENCDEC (BMP,               bmp);
+    REGISTER_DECODER(BMV_VIDEO,         bmv_video);
+    REGISTER_DECODER(BRENDER_PIX,       brender_pix);
+    REGISTER_DECODER(C93,               c93);
+    REGISTER_DECODER(CAVS,              cavs);
+    REGISTER_DECODER(CDGRAPHICS,        cdgraphics);
+    REGISTER_DECODER(CDXL,              cdxl);
+    REGISTER_DECODER(CFHD,              cfhd);
+    REGISTER_ENCDEC (CINEPAK,           cinepak);
+    REGISTER_ENCDEC (CLJR,              cljr);
+    REGISTER_DECODER(CLLC,              cllc);
+    REGISTER_ENCDEC (COMFORTNOISE,      comfortnoise);
+    REGISTER_DECODER(CPIA,              cpia);
+    REGISTER_DECODER(CSCD,              cscd);
+    REGISTER_DECODER(CYUV,              cyuv);
+    REGISTER_DECODER(DDS,               dds);
+    REGISTER_DECODER(DFA,               dfa);
+    REGISTER_DECODER(DIRAC,             dirac);
+    REGISTER_ENCDEC (DNXHD,             dnxhd);
+    REGISTER_ENCDEC (DPX,               dpx);
+    REGISTER_DECODER(DSICINVIDEO,       dsicinvideo);
+    REGISTER_DECODER(DVAUDIO,           dvaudio);
+    REGISTER_ENCDEC (DVVIDEO,           dvvideo);
+    REGISTER_DECODER(DXA,               dxa);
+    REGISTER_DECODER(DXTORY,            dxtory);
+    REGISTER_DECODER(DXV,               dxv);
+    REGISTER_DECODER(EACMV,             eacmv);
+    REGISTER_DECODER(EAMAD,             eamad);
+    REGISTER_DECODER(EATGQ,             eatgq);
+    REGISTER_DECODER(EATGV,             eatgv);
+    REGISTER_DECODER(EATQI,             eatqi);
+    REGISTER_DECODER(EIGHTBPS,          eightbps);
+    REGISTER_DECODER(EIGHTSVX_EXP,      eightsvx_exp);
+    REGISTER_DECODER(EIGHTSVX_FIB,      eightsvx_fib);
+    REGISTER_DECODER(ESCAPE124,         escape124);
+    REGISTER_DECODER(ESCAPE130,         escape130);
+    REGISTER_DECODER(EXR,               exr);
+    REGISTER_ENCDEC (FFV1,              ffv1);
+    REGISTER_ENCDEC (FFVHUFF,           ffvhuff);
+    REGISTER_DECODER(FIC,               fic);
+    REGISTER_ENCDEC (FLASHSV,           flashsv);
+    REGISTER_ENCDEC (FLASHSV2,          flashsv2);
+    REGISTER_DECODER(FLIC,              flic);
+    REGISTER_ENCDEC (FLV,               flv);
+    REGISTER_DECODER(FOURXM,            fourxm);
+    REGISTER_DECODER(FRAPS,             fraps);
+    REGISTER_DECODER(FRWU,              frwu);
+    REGISTER_DECODER(G2M,               g2m);
+    REGISTER_ENCDEC (GIF,               gif);
+    REGISTER_ENCDEC (H261,              h261);
+    REGISTER_ENCDEC (H263,              h263);
+    REGISTER_DECODER(H263I,             h263i);
+    REGISTER_ENCDEC (H263P,             h263p);
+    REGISTER_DECODER(H264,              h264);
+    REGISTER_DECODER(H264_CRYSTALHD,    h264_crystalhd);
+    REGISTER_DECODER(H264_MEDIACODEC,   h264_mediacodec);
+    REGISTER_DECODER(H264_MMAL,         h264_mmal);
+    REGISTER_DECODER(H264_QSV,          h264_qsv);
+    REGISTER_DECODER(H264_VDA,          h264_vda);
+#if FF_API_VDPAU
+    REGISTER_DECODER(H264_VDPAU,        h264_vdpau);
+#endif
+    REGISTER_ENCDEC (HAP,               hap);
+    REGISTER_DECODER(HEVC,              hevc);
+    REGISTER_DECODER(HEVC_QSV,          hevc_qsv);
+    REGISTER_DECODER(HNM4_VIDEO,        hnm4_video);
+    REGISTER_DECODER(HQ_HQA,            hq_hqa);
+    REGISTER_DECODER(HQX,               hqx);
+    REGISTER_ENCDEC (HUFFYUV,           huffyuv);
+    REGISTER_DECODER(IDCIN,             idcin);
+    REGISTER_DECODER(IFF_ILBM,          iff_ilbm);
+    REGISTER_DECODER(INDEO2,            indeo2);
+    REGISTER_DECODER(INDEO3,            indeo3);
+    REGISTER_DECODER(INDEO4,            indeo4);
+    REGISTER_DECODER(INDEO5,            indeo5);
+    REGISTER_DECODER(INTERPLAY_VIDEO,   interplay_video);
+    REGISTER_ENCDEC (JPEG2000,          jpeg2000);
+    REGISTER_ENCDEC (JPEGLS,            jpegls);
+    REGISTER_DECODER(JV,                jv);
+    REGISTER_DECODER(KGV1,              kgv1);
+    REGISTER_DECODER(KMVC,              kmvc);
+    REGISTER_DECODER(LAGARITH,          lagarith);
+    REGISTER_ENCODER(LJPEG,             ljpeg);
+    REGISTER_DECODER(LOCO,              loco);
+    REGISTER_DECODER(M101,              m101);
+    REGISTER_DECODER(MAGICYUV,          magicyuv);
+    REGISTER_DECODER(MDEC,              mdec);
+    REGISTER_DECODER(MIMIC,             mimic);
+    REGISTER_ENCDEC (MJPEG,             mjpeg);
+    REGISTER_DECODER(MJPEGB,            mjpegb);
+    REGISTER_DECODER(MMVIDEO,           mmvideo);
+    REGISTER_DECODER(MOTIONPIXELS,      motionpixels);
+#if FF_API_XVMC
+    REGISTER_DECODER(MPEG_XVMC,         mpeg_xvmc);
+#endif /* FF_API_XVMC */
+    REGISTER_ENCDEC (MPEG1VIDEO,        mpeg1video);
+    REGISTER_ENCDEC (MPEG2VIDEO,        mpeg2video);
+    REGISTER_ENCDEC (MPEG4,             mpeg4);
+    REGISTER_DECODER(MPEG4_CRYSTALHD,   mpeg4_crystalhd);
+    REGISTER_DECODER(MPEG4_MMAL,        mpeg4_mmal);
+#if FF_API_VDPAU
+    REGISTER_DECODER(MPEG4_VDPAU,       mpeg4_vdpau);
+#endif
+    REGISTER_DECODER(MPEGVIDEO,         mpegvideo);
+#if FF_API_VDPAU
+    REGISTER_DECODER(MPEG_VDPAU,        mpeg_vdpau);
+    REGISTER_DECODER(MPEG1_VDPAU,       mpeg1_vdpau);
+#endif
+    REGISTER_DECODER(MPEG2_MMAL,        mpeg2_mmal);
+    REGISTER_DECODER(MPEG2_CRYSTALHD,   mpeg2_crystalhd);
+    REGISTER_DECODER(MPEG2_QSV,         mpeg2_qsv);
+    REGISTER_DECODER(MSA1,              msa1);
+    REGISTER_DECODER(MSMPEG4_CRYSTALHD, msmpeg4_crystalhd);
+    REGISTER_DECODER(MSMPEG4V1,         msmpeg4v1);
+    REGISTER_ENCDEC (MSMPEG4V2,         msmpeg4v2);
+    REGISTER_ENCDEC (MSMPEG4V3,         msmpeg4v3);
+    REGISTER_DECODER(MSRLE,             msrle);
+    REGISTER_DECODER(MSS1,              mss1);
+    REGISTER_DECODER(MSS2,              mss2);
+    REGISTER_ENCDEC (MSVIDEO1,          msvideo1);
+    REGISTER_DECODER(MSZH,              mszh);
+    REGISTER_DECODER(MTS2,              mts2);
+    REGISTER_DECODER(MVC1,              mvc1);
+    REGISTER_DECODER(MVC2,              mvc2);
+    REGISTER_DECODER(MXPEG,             mxpeg);
+    REGISTER_DECODER(NUV,               nuv);
+    REGISTER_DECODER(PAF_VIDEO,         paf_video);
+    REGISTER_ENCDEC (PAM,               pam);
+    REGISTER_ENCDEC (PBM,               pbm);
+    REGISTER_ENCDEC (PCX,               pcx);
+    REGISTER_ENCDEC (PGM,               pgm);
+    REGISTER_ENCDEC (PGMYUV,            pgmyuv);
+    REGISTER_DECODER(PICTOR,            pictor);
+    REGISTER_ENCDEC (PNG,               png);
+    REGISTER_ENCDEC (PPM,               ppm);
+    REGISTER_ENCDEC (PRORES,            prores);
+    REGISTER_ENCODER(PRORES_AW,         prores_aw);
+    REGISTER_ENCODER(PRORES_KS,         prores_ks);
+    REGISTER_DECODER(PRORES_LGPL,       prores_lgpl);
+    REGISTER_DECODER(PTX,               ptx);
+    REGISTER_DECODER(QDRAW,             qdraw);
+    REGISTER_DECODER(QPEG,              qpeg);
+    REGISTER_ENCDEC (QTRLE,             qtrle);
+    REGISTER_ENCDEC (R10K,              r10k);
+    REGISTER_ENCDEC (R210,              r210);
+    REGISTER_ENCDEC (RAWVIDEO,          rawvideo);
+    REGISTER_DECODER(RL2,               rl2);
+    REGISTER_ENCDEC (ROQ,               roq);
+    REGISTER_DECODER(RPZA,              rpza);
+    REGISTER_DECODER(RSCC,              rscc);
+    REGISTER_ENCDEC (RV10,              rv10);
+    REGISTER_ENCDEC (RV20,              rv20);
+    REGISTER_DECODER(RV30,              rv30);
+    REGISTER_DECODER(RV40,              rv40);
+    REGISTER_ENCDEC (S302M,             s302m);
+    REGISTER_DECODER(SANM,              sanm);
+    REGISTER_DECODER(SCREENPRESSO,      screenpresso);
+    REGISTER_DECODER(SDX2_DPCM,         sdx2_dpcm);
+    REGISTER_ENCDEC (SGI,               sgi);
+    REGISTER_DECODER(SGIRLE,            sgirle);
+    REGISTER_DECODER(SHEERVIDEO,        sheervideo);
+    REGISTER_DECODER(SMACKER,           smacker);
+    REGISTER_DECODER(SMC,               smc);
+    REGISTER_DECODER(SMVJPEG,           smvjpeg);
+    REGISTER_ENCDEC (SNOW,              snow);
+    REGISTER_DECODER(SP5X,              sp5x);
+    REGISTER_ENCDEC (SUNRAST,           sunrast);
+    REGISTER_ENCDEC (SVQ1,              svq1);
+    REGISTER_DECODER(SVQ3,              svq3);
+    REGISTER_ENCDEC (TARGA,             targa);
+    REGISTER_DECODER(TARGA_Y216,        targa_y216);
+    REGISTER_DECODER(TDSC,              tdsc);
+    REGISTER_DECODER(THEORA,            theora);
+    REGISTER_DECODER(THP,               thp);
+    REGISTER_DECODER(TIERTEXSEQVIDEO,   tiertexseqvideo);
+    REGISTER_ENCDEC (TIFF,              tiff);
+    REGISTER_DECODER(TMV,               tmv);
+    REGISTER_DECODER(TRUEMOTION1,       truemotion1);
+    REGISTER_DECODER(TRUEMOTION2,       truemotion2);
+    REGISTER_DECODER(TRUEMOTION2RT,     truemotion2rt);
+    REGISTER_DECODER(TSCC,              tscc);
+    REGISTER_DECODER(TSCC2,             tscc2);
+    REGISTER_DECODER(TXD,               txd);
+    REGISTER_DECODER(ULTI,              ulti);
+    REGISTER_ENCDEC (UTVIDEO,           utvideo);
+    REGISTER_ENCDEC (V210,              v210);
+    REGISTER_DECODER(V210X,             v210x);
+    REGISTER_ENCDEC (V308,              v308);
+    REGISTER_ENCDEC (V408,              v408);
+    REGISTER_ENCDEC (V410,              v410);
+    REGISTER_DECODER(VB,                vb);
+    REGISTER_DECODER(VBLE,              vble);
+    REGISTER_DECODER(VC1,               vc1);
+    REGISTER_DECODER(VC1_CRYSTALHD,     vc1_crystalhd);
+#if FF_API_VDPAU
+    REGISTER_DECODER(VC1_VDPAU,         vc1_vdpau);
+#endif
+    REGISTER_DECODER(VC1IMAGE,          vc1image);
+    REGISTER_DECODER(VC1_MMAL,          vc1_mmal);
+    REGISTER_DECODER(VC1_QSV,           vc1_qsv);
+    REGISTER_ENCODER(VC2,               vc2);
+    REGISTER_DECODER(VCR1,              vcr1);
+    REGISTER_DECODER(VMDVIDEO,          vmdvideo);
+    REGISTER_DECODER(VMNC,              vmnc);
+    REGISTER_DECODER(VP3,               vp3);
+    REGISTER_DECODER(VP5,               vp5);
+    REGISTER_DECODER(VP6,               vp6);
+    REGISTER_DECODER(VP6A,              vp6a);
+    REGISTER_DECODER(VP6F,              vp6f);
+    REGISTER_DECODER(VP7,               vp7);
+    REGISTER_DECODER(VP8,               vp8);
+    REGISTER_DECODER(VP9,               vp9);
+    REGISTER_DECODER(VQA,               vqa);
+    REGISTER_DECODER(WEBP,              webp);
+    REGISTER_ENCODER(WRAPPED_AVFRAME,   wrapped_avframe);
+    REGISTER_ENCDEC (WMV1,              wmv1);
+    REGISTER_ENCDEC (WMV2,              wmv2);
+    REGISTER_DECODER(WMV3,              wmv3);
+    REGISTER_DECODER(WMV3_CRYSTALHD,    wmv3_crystalhd);
+#if FF_API_VDPAU
+    REGISTER_DECODER(WMV3_VDPAU,        wmv3_vdpau);
+#endif
+    REGISTER_DECODER(WMV3IMAGE,         wmv3image);
+    REGISTER_DECODER(WNV1,              wnv1);
+    REGISTER_DECODER(XAN_WC3,           xan_wc3);
+    REGISTER_DECODER(XAN_WC4,           xan_wc4);
+    REGISTER_ENCDEC (XBM,               xbm);
+    REGISTER_ENCDEC (XFACE,             xface);
+    REGISTER_DECODER(XL,                xl);
+    REGISTER_ENCDEC (XWD,               xwd);
+    REGISTER_ENCDEC (Y41P,              y41p);
+    REGISTER_DECODER(YLC,               ylc);
+    REGISTER_DECODER(YOP,               yop);
+    REGISTER_ENCDEC (YUV4,              yuv4);
+    REGISTER_DECODER(ZERO12V,           zero12v);
+    REGISTER_DECODER(ZEROCODEC,         zerocodec);
+    REGISTER_ENCDEC (ZLIB,              zlib);
+    REGISTER_ENCDEC (ZMBV,              zmbv);
+
+    /* audio codecs */
+    REGISTER_ENCDEC (AAC,               aac);
+    REGISTER_DECODER(AAC_FIXED,         aac_fixed);
+    REGISTER_DECODER(AAC_LATM,          aac_latm);
+    REGISTER_ENCDEC (AC3,               ac3);
+    REGISTER_ENCDEC (AC3_FIXED,         ac3_fixed);
+    REGISTER_ENCDEC (ALAC,              alac);
+    REGISTER_DECODER(ALS,               als);
+    REGISTER_DECODER(AMRNB,             amrnb);
+    REGISTER_DECODER(AMRWB,             amrwb);
+    REGISTER_DECODER(APE,               ape);
+    REGISTER_DECODER(ATRAC1,            atrac1);
+    REGISTER_DECODER(ATRAC3,            atrac3);
+    REGISTER_DECODER(ATRAC3P,           atrac3p);
+    REGISTER_DECODER(BINKAUDIO_DCT,     binkaudio_dct);
+    REGISTER_DECODER(BINKAUDIO_RDFT,    binkaudio_rdft);
+    REGISTER_DECODER(BMV_AUDIO,         bmv_audio);
+    REGISTER_DECODER(COOK,              cook);
+    REGISTER_ENCDEC (DCA,               dca);
+    REGISTER_DECODER(DSD_LSBF,          dsd_lsbf);
+    REGISTER_DECODER(DSD_MSBF,          dsd_msbf);
+    REGISTER_DECODER(DSD_LSBF_PLANAR,   dsd_lsbf_planar);
+    REGISTER_DECODER(DSD_MSBF_PLANAR,   dsd_msbf_planar);
+    REGISTER_DECODER(DSICINAUDIO,       dsicinaudio);
+    REGISTER_DECODER(DSS_SP,            dss_sp);
+    REGISTER_DECODER(DST,               dst);
+    REGISTER_ENCDEC (EAC3,              eac3);
+    REGISTER_DECODER(EVRC,              evrc);
+    REGISTER_DECODER(FFWAVESYNTH,       ffwavesynth);
+    REGISTER_ENCDEC (FLAC,              flac);
+    REGISTER_ENCDEC (G723_1,            g723_1);
+    REGISTER_DECODER(G729,              g729);
+    REGISTER_DECODER(GSM,               gsm);
+    REGISTER_DECODER(GSM_MS,            gsm_ms);
+    REGISTER_DECODER(IAC,               iac);
+    REGISTER_DECODER(IMC,               imc);
+    REGISTER_DECODER(INTERPLAY_ACM,     interplay_acm);
+    REGISTER_DECODER(MACE3,             mace3);
+    REGISTER_DECODER(MACE6,             mace6);
+    REGISTER_DECODER(METASOUND,         metasound);
+    REGISTER_DECODER(MLP,               mlp);
+    REGISTER_DECODER(MP1,               mp1);
+    REGISTER_DECODER(MP1FLOAT,          mp1float);
+    REGISTER_ENCDEC (MP2,               mp2);
+    REGISTER_DECODER(MP2FLOAT,          mp2float);
+    REGISTER_ENCODER(MP2FIXED,          mp2fixed);
+    REGISTER_DECODER(MP3,               mp3);
+    REGISTER_DECODER(MP3FLOAT,          mp3float);
+    REGISTER_DECODER(MP3ADU,            mp3adu);
+    REGISTER_DECODER(MP3ADUFLOAT,       mp3adufloat);
+    REGISTER_DECODER(MP3ON4,            mp3on4);
+    REGISTER_DECODER(MP3ON4FLOAT,       mp3on4float);
+    REGISTER_DECODER(MPC7,              mpc7);
+    REGISTER_DECODER(MPC8,              mpc8);
+    REGISTER_ENCDEC (NELLYMOSER,        nellymoser);
+    REGISTER_DECODER(ON2AVC,            on2avc);
+    REGISTER_DECODER(OPUS,              opus);
+    REGISTER_DECODER(PAF_AUDIO,         paf_audio);
+    REGISTER_DECODER(QCELP,             qcelp);
+    REGISTER_DECODER(QDM2,              qdm2);
+    REGISTER_ENCDEC (RA_144,            ra_144);
+    REGISTER_DECODER(RA_288,            ra_288);
+    REGISTER_DECODER(RALF,              ralf);
+    REGISTER_DECODER(SHORTEN,           shorten);
+    REGISTER_DECODER(SIPR,              sipr);
+    REGISTER_DECODER(SMACKAUD,          smackaud);
+    REGISTER_ENCDEC (SONIC,             sonic);
+    REGISTER_ENCODER(SONIC_LS,          sonic_ls);
+    REGISTER_DECODER(TAK,               tak);
+    REGISTER_DECODER(TRUEHD,            truehd);
+    REGISTER_DECODER(TRUESPEECH,        truespeech);
+    REGISTER_ENCDEC (TTA,               tta);
+    REGISTER_DECODER(TWINVQ,            twinvq);
+    REGISTER_DECODER(VMDAUDIO,          vmdaudio);
+    REGISTER_ENCDEC (VORBIS,            vorbis);
+    REGISTER_ENCDEC (WAVPACK,           wavpack);
+    REGISTER_DECODER(WMALOSSLESS,       wmalossless);
+    REGISTER_DECODER(WMAPRO,            wmapro);
+    REGISTER_ENCDEC (WMAV1,             wmav1);
+    REGISTER_ENCDEC (WMAV2,             wmav2);
+    REGISTER_DECODER(WMAVOICE,          wmavoice);
+    REGISTER_DECODER(WS_SND1,           ws_snd1);
+    REGISTER_DECODER(XMA1,              xma1);
+    REGISTER_DECODER(XMA2,              xma2);
+
+    /* PCM codecs */
+    REGISTER_ENCDEC (PCM_ALAW,          pcm_alaw);
+    REGISTER_DECODER(PCM_BLURAY,        pcm_bluray);
+    REGISTER_DECODER(PCM_DVD,           pcm_dvd);
+    REGISTER_ENCDEC (PCM_F32BE,         pcm_f32be);
+    REGISTER_ENCDEC (PCM_F32LE,         pcm_f32le);
+    REGISTER_ENCDEC (PCM_F64BE,         pcm_f64be);
+    REGISTER_ENCDEC (PCM_F64LE,         pcm_f64le);
+    REGISTER_DECODER(PCM_LXF,           pcm_lxf);
+    REGISTER_ENCDEC (PCM_MULAW,         pcm_mulaw);
+    REGISTER_ENCDEC (PCM_S8,            pcm_s8);
+    REGISTER_ENCDEC (PCM_S8_PLANAR,     pcm_s8_planar);
+    REGISTER_ENCDEC (PCM_S16BE,         pcm_s16be);
+    REGISTER_ENCDEC (PCM_S16BE_PLANAR,  pcm_s16be_planar);
+    REGISTER_ENCDEC (PCM_S16LE,         pcm_s16le);
+    REGISTER_ENCDEC (PCM_S16LE_PLANAR,  pcm_s16le_planar);
+    REGISTER_ENCDEC (PCM_S24BE,         pcm_s24be);
+    REGISTER_ENCDEC (PCM_S24DAUD,       pcm_s24daud);
+    REGISTER_ENCDEC (PCM_S24LE,         pcm_s24le);
+    REGISTER_ENCDEC (PCM_S24LE_PLANAR,  pcm_s24le_planar);
+    REGISTER_ENCDEC (PCM_S32BE,         pcm_s32be);
+    REGISTER_ENCDEC (PCM_S32LE,         pcm_s32le);
+    REGISTER_ENCDEC (PCM_S32LE_PLANAR,  pcm_s32le_planar);
+    REGISTER_ENCDEC (PCM_U8,            pcm_u8);
+    REGISTER_ENCDEC (PCM_U16BE,         pcm_u16be);
+    REGISTER_ENCDEC (PCM_U16LE,         pcm_u16le);
+    REGISTER_ENCDEC (PCM_U24BE,         pcm_u24be);
+    REGISTER_ENCDEC (PCM_U24LE,         pcm_u24le);
+    REGISTER_ENCDEC (PCM_U32BE,         pcm_u32be);
+    REGISTER_ENCDEC (PCM_U32LE,         pcm_u32le);
+    REGISTER_DECODER(PCM_ZORK,          pcm_zork);
+
+    /* DPCM codecs */
+    REGISTER_DECODER(INTERPLAY_DPCM,    interplay_dpcm);
+    REGISTER_ENCDEC (ROQ_DPCM,          roq_dpcm);
+    REGISTER_DECODER(SOL_DPCM,          sol_dpcm);
+    REGISTER_DECODER(XAN_DPCM,          xan_dpcm);
+
+    /* ADPCM codecs */
+    REGISTER_DECODER(ADPCM_4XM,         adpcm_4xm);
+    REGISTER_ENCDEC (ADPCM_ADX,         adpcm_adx);
+    REGISTER_DECODER(ADPCM_AFC,         adpcm_afc);
+    REGISTER_DECODER(ADPCM_AICA,        adpcm_aica);
+    REGISTER_DECODER(ADPCM_CT,          adpcm_ct);
+    REGISTER_DECODER(ADPCM_DTK,         adpcm_dtk);
+    REGISTER_DECODER(ADPCM_EA,          adpcm_ea);
+    REGISTER_DECODER(ADPCM_EA_MAXIS_XA, adpcm_ea_maxis_xa);
+    REGISTER_DECODER(ADPCM_EA_R1,       adpcm_ea_r1);
+    REGISTER_DECODER(ADPCM_EA_R2,       adpcm_ea_r2);
+    REGISTER_DECODER(ADPCM_EA_R3,       adpcm_ea_r3);
+    REGISTER_DECODER(ADPCM_EA_XAS,      adpcm_ea_xas);
+    REGISTER_ENCDEC (ADPCM_G722,        adpcm_g722);
+    REGISTER_ENCDEC (ADPCM_G726,        adpcm_g726);
+    REGISTER_DECODER(ADPCM_G726LE,      adpcm_g726le);
+    REGISTER_DECODER(ADPCM_IMA_AMV,     adpcm_ima_amv);
+    REGISTER_DECODER(ADPCM_IMA_APC,     adpcm_ima_apc);
+    REGISTER_DECODER(ADPCM_IMA_DAT4,    adpcm_ima_dat4);
+    REGISTER_DECODER(ADPCM_IMA_DK3,     adpcm_ima_dk3);
+    REGISTER_DECODER(ADPCM_IMA_DK4,     adpcm_ima_dk4);
+    REGISTER_DECODER(ADPCM_IMA_EA_EACS, adpcm_ima_ea_eacs);
+    REGISTER_DECODER(ADPCM_IMA_EA_SEAD, adpcm_ima_ea_sead);
+    REGISTER_DECODER(ADPCM_IMA_ISS,     adpcm_ima_iss);
+    REGISTER_DECODER(ADPCM_IMA_OKI,     adpcm_ima_oki);
+    REGISTER_ENCDEC (ADPCM_IMA_QT,      adpcm_ima_qt);
+    REGISTER_DECODER(ADPCM_IMA_RAD,     adpcm_ima_rad);
+    REGISTER_DECODER(ADPCM_IMA_SMJPEG,  adpcm_ima_smjpeg);
+    REGISTER_ENCDEC (ADPCM_IMA_WAV,     adpcm_ima_wav);
+    REGISTER_DECODER(ADPCM_IMA_WS,      adpcm_ima_ws);
+    REGISTER_ENCDEC (ADPCM_MS,          adpcm_ms);
+    REGISTER_DECODER(ADPCM_MTAF,        adpcm_mtaf);
+    REGISTER_DECODER(ADPCM_PSX,         adpcm_psx);
+    REGISTER_DECODER(ADPCM_SBPRO_2,     adpcm_sbpro_2);
+    REGISTER_DECODER(ADPCM_SBPRO_3,     adpcm_sbpro_3);
+    REGISTER_DECODER(ADPCM_SBPRO_4,     adpcm_sbpro_4);
+    REGISTER_ENCDEC (ADPCM_SWF,         adpcm_swf);
+    REGISTER_DECODER(ADPCM_THP,         adpcm_thp);
+    REGISTER_DECODER(ADPCM_THP_LE,      adpcm_thp_le);
+    REGISTER_DECODER(ADPCM_VIMA,        adpcm_vima);
+    REGISTER_DECODER(ADPCM_XA,          adpcm_xa);
+    REGISTER_ENCDEC (ADPCM_YAMAHA,      adpcm_yamaha);
+
+    /* subtitles */
+    REGISTER_ENCDEC (SSA,               ssa);
+    REGISTER_ENCDEC (ASS,               ass);
+    REGISTER_DECODER(CCAPTION,          ccaption);
+    REGISTER_ENCDEC (DVBSUB,            dvbsub);
+    REGISTER_ENCDEC (DVDSUB,            dvdsub);
+    REGISTER_DECODER(JACOSUB,           jacosub);
+    REGISTER_DECODER(MICRODVD,          microdvd);
+    REGISTER_ENCDEC (MOVTEXT,           movtext);
+    REGISTER_DECODER(MPL2,              mpl2);
+    REGISTER_DECODER(PGSSUB,            pgssub);
+    REGISTER_DECODER(PJS,               pjs);
+    REGISTER_DECODER(REALTEXT,          realtext);
+    REGISTER_DECODER(SAMI,              sami);
+    REGISTER_ENCDEC (SRT,               srt);
+    REGISTER_DECODER(STL,               stl);
+    REGISTER_ENCDEC (SUBRIP,            subrip);
+    REGISTER_DECODER(SUBVIEWER,         subviewer);
+    REGISTER_DECODER(SUBVIEWER1,        subviewer1);
+    REGISTER_ENCDEC (TEXT,              text);
+    REGISTER_DECODER(VPLAYER,           vplayer);
+    REGISTER_ENCDEC (WEBVTT,            webvtt);
+    REGISTER_ENCDEC (XSUB,              xsub);
+
+    /* external libraries */
+    REGISTER_ENCDEC (AAC_AT,            aac_at);
+    REGISTER_DECODER(AC3_AT,            ac3_at);
+    REGISTER_DECODER(ADPCM_IMA_QT_AT,   adpcm_ima_qt_at);
+    REGISTER_ENCDEC (ALAC_AT,           alac_at);
+    REGISTER_DECODER(AMR_NB_AT,         amr_nb_at);
+    REGISTER_DECODER(EAC3_AT,           eac3_at);
+    REGISTER_DECODER(GSM_MS_AT,         gsm_ms_at);
+    REGISTER_ENCDEC (ILBC_AT,           ilbc_at);
+    REGISTER_DECODER(MP1_AT,            mp1_at);
+    REGISTER_DECODER(MP2_AT,            mp2_at);
+    REGISTER_DECODER(MP3_AT,            mp3_at);
+    REGISTER_ENCDEC (PCM_ALAW_AT,       pcm_alaw_at);
+    REGISTER_ENCDEC (PCM_MULAW_AT,      pcm_mulaw_at);
+    REGISTER_DECODER(QDMC_AT,           qdmc_at);
+    REGISTER_DECODER(QDM2_AT,           qdm2_at);
+    REGISTER_DECODER(LIBCELT,           libcelt);
+    REGISTER_ENCODER(LIBFAAC,           libfaac);
+    REGISTER_ENCDEC (LIBFDK_AAC,        libfdk_aac);
+    REGISTER_ENCDEC (LIBGSM,            libgsm);
+    REGISTER_ENCDEC (LIBGSM_MS,         libgsm_ms);
+    REGISTER_ENCDEC (LIBILBC,           libilbc);
+    REGISTER_ENCODER(LIBMP3LAME,        libmp3lame);
+    REGISTER_ENCDEC (LIBOPENCORE_AMRNB, libopencore_amrnb);
+    REGISTER_DECODER(LIBOPENCORE_AMRWB, libopencore_amrwb);
+    REGISTER_ENCDEC (LIBOPENJPEG,       libopenjpeg);
+    REGISTER_ENCDEC (LIBOPUS,           libopus);
+    REGISTER_ENCDEC (LIBSCHROEDINGER,   libschroedinger);
+    REGISTER_ENCODER(LIBSHINE,          libshine);
+    REGISTER_ENCDEC (LIBSPEEX,          libspeex);
+    REGISTER_ENCODER(LIBTHEORA,         libtheora);
+    REGISTER_ENCODER(LIBTWOLAME,        libtwolame);
+    REGISTER_ENCODER(LIBVO_AMRWBENC,    libvo_amrwbenc);
+    REGISTER_ENCDEC (LIBVORBIS,         libvorbis);
+    REGISTER_ENCDEC (LIBVPX_VP8,        libvpx_vp8);
+    REGISTER_ENCDEC (LIBVPX_VP9,        libvpx_vp9);
+    REGISTER_ENCODER(LIBWAVPACK,        libwavpack);
+    REGISTER_ENCODER(LIBWEBP_ANIM,      libwebp_anim);  /* preferred over libwebp */
+    REGISTER_ENCODER(LIBWEBP,           libwebp);
+    REGISTER_ENCODER(LIBX262,           libx262);
+    REGISTER_ENCODER(LIBX264,           libx264);
+    REGISTER_ENCODER(LIBX264RGB,        libx264rgb);
+    REGISTER_ENCODER(LIBX265,           libx265);
+    REGISTER_ENCODER(LIBXAVS,           libxavs);
+    REGISTER_ENCODER(LIBXVID,           libxvid);
+    REGISTER_DECODER(LIBZVBI_TELETEXT,  libzvbi_teletext);
+
+    /* text */
+    REGISTER_DECODER(BINTEXT,           bintext);
+    REGISTER_DECODER(XBIN,              xbin);
+    REGISTER_DECODER(IDF,               idf);
+
+    /* external libraries, that shouldn't be used by default if one of the
+     * above is available */
+    REGISTER_ENCODER(LIBOPENH264,       libopenh264);
+    REGISTER_DECODER(H264_CUVID,        h264_cuvid);
+    REGISTER_ENCODER(H264_NVENC,        h264_nvenc);
+    REGISTER_ENCODER(H264_OMX,          h264_omx);
+    REGISTER_ENCODER(H264_QSV,          h264_qsv);
+    REGISTER_ENCODER(H264_VAAPI,        h264_vaapi);
+    REGISTER_ENCODER(H264_VIDEOTOOLBOX, h264_videotoolbox);
+#if FF_API_NVENC_OLD_NAME
+    REGISTER_ENCODER(NVENC,             nvenc);
+    REGISTER_ENCODER(NVENC_H264,        nvenc_h264);
+    REGISTER_ENCODER(NVENC_HEVC,        nvenc_hevc);
+#endif
+    REGISTER_DECODER(HEVC_CUVID,        hevc_cuvid);
+    REGISTER_ENCODER(HEVC_NVENC,        hevc_nvenc);
+    REGISTER_ENCODER(HEVC_QSV,          hevc_qsv);
+    REGISTER_ENCODER(HEVC_VAAPI,        hevc_vaapi);
+    REGISTER_ENCODER(LIBKVAZAAR,        libkvazaar);
+    REGISTER_ENCODER(MJPEG_VAAPI,       mjpeg_vaapi);
+    REGISTER_ENCODER(MPEG2_QSV,         mpeg2_qsv);
+    REGISTER_DECODER(VC1_CUVID,         vc1_cuvid);
+    REGISTER_DECODER(VP8_CUVID,         vp8_cuvid);
+    REGISTER_DECODER(VP9_CUVID,         vp9_cuvid);
+
+    /* parsers */
+    REGISTER_PARSER(AAC,                aac);
+    REGISTER_PARSER(AAC_LATM,           aac_latm);
+    REGISTER_PARSER(AC3,                ac3);
+    REGISTER_PARSER(ADX,                adx);
+    REGISTER_PARSER(BMP,                bmp);
+    REGISTER_PARSER(CAVSVIDEO,          cavsvideo);
+    REGISTER_PARSER(COOK,               cook);
+    REGISTER_PARSER(DCA,                dca);
+    REGISTER_PARSER(DIRAC,              dirac);
+    REGISTER_PARSER(DNXHD,              dnxhd);
+    REGISTER_PARSER(DPX,                dpx);
+    REGISTER_PARSER(DVAUDIO,            dvaudio);
+    REGISTER_PARSER(DVBSUB,             dvbsub);
+    REGISTER_PARSER(DVDSUB,             dvdsub);
+    REGISTER_PARSER(DVD_NAV,            dvd_nav);
+    REGISTER_PARSER(FLAC,               flac);
+    REGISTER_PARSER(G729,               g729);
+    REGISTER_PARSER(GSM,                gsm);
+    REGISTER_PARSER(H261,               h261);
+    REGISTER_PARSER(H263,               h263);
+    REGISTER_PARSER(H264,               h264);
+    REGISTER_PARSER(HEVC,               hevc);
+    REGISTER_PARSER(MJPEG,              mjpeg);
+    REGISTER_PARSER(MLP,                mlp);
+    REGISTER_PARSER(MPEG4VIDEO,         mpeg4video);
+    REGISTER_PARSER(MPEGAUDIO,          mpegaudio);
+    REGISTER_PARSER(MPEGVIDEO,          mpegvideo);
+    REGISTER_PARSER(OPUS,               opus);
+    REGISTER_PARSER(PNG,                png);
+    REGISTER_PARSER(PNM,                pnm);
+    REGISTER_PARSER(RV30,               rv30);
+    REGISTER_PARSER(RV40,               rv40);
+    REGISTER_PARSER(TAK,                tak);
+    REGISTER_PARSER(VC1,                vc1);
+    REGISTER_PARSER(VORBIS,             vorbis);
+    REGISTER_PARSER(VP3,                vp3);
+    REGISTER_PARSER(VP8,                vp8);
+    REGISTER_PARSER(VP9,                vp9);
+}
diff --git a/media/ffvpx/libavcodec/audioconvert.c b/media/ffvpx/libavcodec/audioconvert.c
new file mode 100644
index 000000000..5e46fae2d
--- /dev/null
+++ b/media/ffvpx/libavcodec/audioconvert.c
@@ -0,0 +1,120 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio conversion
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/libm.h"
+#include "libavutil/samplefmt.h"
+#include "avcodec.h"
+#include "audioconvert.h"
+
+#if FF_API_AUDIO_CONVERT
+
+struct AVAudioConvert {
+    int in_channels, out_channels;
+    int fmt_pair;
+};
+
+AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
+                                       enum AVSampleFormat in_fmt, int in_channels,
+                                       const float *matrix, int flags)
+{
+    AVAudioConvert *ctx;
+    if (in_channels!=out_channels)
+        return NULL;  /* FIXME: not supported */
+    ctx = av_malloc(sizeof(AVAudioConvert));
+    if (!ctx)
+        return NULL;
+    ctx->in_channels = in_channels;
+    ctx->out_channels = out_channels;
+    ctx->fmt_pair = out_fmt + AV_SAMPLE_FMT_NB*in_fmt;
+    return ctx;
+}
+
+void av_audio_convert_free(AVAudioConvert *ctx)
+{
+    av_free(ctx);
+}
+
+int av_audio_convert(AVAudioConvert *ctx,
+                           void * const out[6], const int out_stride[6],
+                     const void * const  in[6], const int  in_stride[6], int len)
+{
+    int ch;
+
+    //FIXME optimize common cases
+
+    for(ch=0; ch<ctx->out_channels; ch++){
+        const int is=  in_stride[ch];
+        const int os= out_stride[ch];
+        const uint8_t *pi=  in[ch];
+        uint8_t *po= out[ch];
+        uint8_t *end= po + os*len;
+        if(!out[ch])
+            continue;
+
+#define CONV(ofmt, otype, ifmt, expr)\
+if(ctx->fmt_pair == ofmt + AV_SAMPLE_FMT_NB*ifmt){\
+    do{\
+        *(otype*)po = expr; pi += is; po += os;\
+    }while(po < end);\
+}
+
+//FIXME put things below under ifdefs so we do not waste space for cases no codec will need
+//FIXME rounding ?
+
+             CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<8)
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<24)
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi<<16)
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const float*)pi)
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi)
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const double*)pi)
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi)
+        else return -1;
+    }
+    return 0;
+}
+
+#endif /* FF_API_AUDIO_CONVERT */
diff --git a/media/ffvpx/libavcodec/audioconvert.h b/media/ffvpx/libavcodec/audioconvert.h
new file mode 100644
index 000000000..996c3f37a
--- /dev/null
+++ b/media/ffvpx/libavcodec/audioconvert.h
@@ -0,0 +1,86 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2008 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AUDIOCONVERT_H
+#define AVCODEC_AUDIOCONVERT_H
+
+#include "version.h"
+
+/**
+ * @file
+ * Audio format conversion routines
+ * This interface is deprecated and will be dropped in a future
+ * version. You should use the libswresample library instead.
+ */
+
+#if FF_API_AUDIO_CONVERT
+
+#include "libavutil/cpu.h"
+#include "avcodec.h"
+#include "libavutil/channel_layout.h"
+
+struct AVAudioConvert;
+typedef struct AVAudioConvert AVAudioConvert;
+
+/**
+ * Create an audio sample format converter context
+ * @param out_fmt Output sample format
+ * @param out_channels Number of output channels
+ * @param in_fmt Input sample format
+ * @param in_channels Number of input channels
+ * @param[in] matrix Channel mixing matrix (of dimension in_channel*out_channels). Set to NULL to ignore.
+ * @param flags See AV_CPU_FLAG_xx
+ * @return NULL on error
+ * @deprecated See libswresample
+ */
+
+attribute_deprecated
+AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
+                                       enum AVSampleFormat in_fmt, int in_channels,
+                                       const float *matrix, int flags);
+
+/**
+ * Free audio sample format converter context
+ * @deprecated See libswresample
+ */
+
+attribute_deprecated
+void av_audio_convert_free(AVAudioConvert *ctx);
+
+/**
+ * Convert between audio sample formats
+ * @param[in] out array of output buffers for each channel. set to NULL to ignore processing of the given channel.
+ * @param[in] out_stride distance between consecutive output samples (measured in bytes)
+ * @param[in] in array of input buffers for each channel
+ * @param[in] in_stride distance between consecutive input samples (measured in bytes)
+ * @param len length of audio frame size (measured in samples)
+ * @deprecated See libswresample
+ */
+
+attribute_deprecated
+int av_audio_convert(AVAudioConvert *ctx,
+                           void * const out[6], const int out_stride[6],
+                     const void * const  in[6], const int  in_stride[6], int len);
+
+#endif /* FF_API_AUDIO_CONVERT */
+
+#endif /* AVCODEC_AUDIOCONVERT_H */
diff --git a/media/ffvpx/libavcodec/avcodec.h b/media/ffvpx/libavcodec/avcodec.h
new file mode 100644
index 000000000..39713ed76
--- /dev/null
+++ b/media/ffvpx/libavcodec/avcodec.h
@@ -0,0 +1,6100 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVCODEC_H
+#define AVCODEC_AVCODEC_H
+
+/**
+ * @file
+ * @ingroup libavc
+ * Libavcodec external API header
+ */
+
+#include <errno.h>
+#include "libavutil/samplefmt.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avutil.h"
+#include "libavutil/buffer.h"
+#include "libavutil/cpu.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/dict.h"
+#include "libavutil/frame.h"
+#include "libavutil/log.h"
+#include "libavutil/pixfmt.h"
+#include "libavutil/rational.h"
+
+#include "version.h"
+
+/**
+ * @defgroup libavc Encoding/Decoding Library
+ * @{
+ *
+ * @defgroup lavc_decoding Decoding
+ * @{
+ * @}
+ *
+ * @defgroup lavc_encoding Encoding
+ * @{
+ * @}
+ *
+ * @defgroup lavc_codec Codecs
+ * @{
+ * @defgroup lavc_codec_native Native Codecs
+ * @{
+ * @}
+ * @defgroup lavc_codec_wrappers External library wrappers
+ * @{
+ * @}
+ * @defgroup lavc_codec_hwaccel Hardware Accelerators bridge
+ * @{
+ * @}
+ * @}
+ * @defgroup lavc_internal Internal
+ * @{
+ * @}
+ * @}
+ */
+
+/**
+ * @ingroup libavc
+ * @defgroup lavc_encdec send/receive encoding and decoding API overview
+ * @{
+ *
+ * The avcodec_send_packet()/avcodec_receive_frame()/avcodec_send_frame()/
+ * avcodec_receive_packet() functions provide an encode/decode API, which
+ * decouples input and output.
+ *
+ * The API is very similar for encoding/decoding and audio/video, and works as
+ * follows:
+ * - Set up and open the AVCodecContext as usual.
+ * - Send valid input:
+ *   - For decoding, call avcodec_send_packet() to give the decoder raw
+ *     compressed data in an AVPacket.
+ *   - For encoding, call avcodec_send_frame() to give the decoder an AVFrame
+ *     containing uncompressed audio or video.
+ *   In both cases, it is recommended that AVPackets and AVFrames are
+ *   refcounted, or libavcodec might have to copy the input data. (libavformat
+ *   always returns refcounted AVPackets, and av_frame_get_buffer() allocates
+ *   refcounted AVFrames.)
+ * - Receive output in a loop. Periodically call one of the avcodec_receive_*()
+ *   functions and process their output:
+ *   - For decoding, call avcodec_receive_frame(). On success, it will return
+ *     an AVFrame containing uncompressed audio or video data.
+ *   - For encoding, call avcodec_receive_packet(). On success, it will return
+ *     an AVPacket with a compressed frame.
+ *   Repeat this call until it returns AVERROR(EAGAIN) or an error. The
+ *   AVERROR(EAGAIN) return value means that new input data is required to
+ *   return new output. In this case, continue with sending input. For each
+ *   input frame/packet, the codec will typically return 1 output frame/packet,
+ *   but it can also be 0 or more than 1.
+ *
+ * At the beginning of decoding or encoding, the codec might accept multiple
+ * input frames/packets without returning a frame, until its internal buffers
+ * are filled. This situation is handled transparently if you follow the steps
+ * outlined above.
+ *
+ * End of stream situations. These require "flushing" (aka draining) the codec,
+ * as the codec might buffer multiple frames or packets internally for
+ * performance or out of necessity (consider B-frames).
+ * This is handled as follows:
+ * - Instead of valid input, send NULL to the avcodec_send_packet() (decoding)
+ *   or avcodec_send_frame() (encoding) functions. This will enter draining
+ *   mode.
+ * - Call avcodec_receive_frame() (decoding) or avcodec_receive_packet()
+ *   (encoding) in a loop until AVERROR_EOF is returned. The functions will
+ *   not return AVERROR(EAGAIN), unless you forgot to enter draining mode.
+ * - Before decoding can be resumed again, the codec has to be reset with
+ *   avcodec_flush_buffers().
+ *
+ * Using the API as outlined above is highly recommended. But it is also
+ * possible to call functions outside of this rigid schema. For example, you can
+ * call avcodec_send_packet() repeatedly without calling
+ * avcodec_receive_frame(). In this case, avcodec_send_packet() will succeed
+ * until the codec's internal buffer has been filled up (which is typically of
+ * size 1 per output frame, after initial input), and then reject input with
+ * AVERROR(EAGAIN). Once it starts rejecting input, you have no choice but to
+ * read at least some output.
+ *
+ * Not all codecs will follow a rigid and predictable dataflow; the only
+ * guarantee is that an AVERROR(EAGAIN) return value on a send/receive call on
+ * one end implies that a receive/send call on the other end will succeed. In
+ * general, no codec will permit unlimited buffering of input or output.
+ *
+ * This API replaces the following legacy functions:
+ * - avcodec_decode_video2() and avcodec_decode_audio4():
+ *   Use avcodec_send_packet() to feed input to the decoder, then use
+ *   avcodec_receive_frame() to receive decoded frames after each packet.
+ *   Unlike with the old video decoding API, multiple frames might result from
+ *   a packet. For audio, splitting the input packet into frames by partially
+ *   decoding packets becomes transparent to the API user. You never need to
+ *   feed an AVPacket to the API twice.
+ *   Additionally, sending a flush/draining packet is required only once.
+ * - avcodec_encode_video2()/avcodec_encode_audio2():
+ *   Use avcodec_send_frame() to feed input to the encoder, then use
+ *   avcodec_receive_packet() to receive encoded packets.
+ *   Providing user-allocated buffers for avcodec_receive_packet() is not
+ *   possible.
+ * - The new API does not handle subtitles yet.
+ *
+ * Mixing new and old function calls on the same AVCodecContext is not allowed,
+ * and will result in undefined behavior.
+ *
+ * Some codecs might require using the new API; using the old API will return
+ * an error when calling it.
+ * @}
+ */
+
+/**
+ * @defgroup lavc_core Core functions/structures.
+ * @ingroup libavc
+ *
+ * Basic definitions, functions for querying libavcodec capabilities,
+ * allocating core structures, etc.
+ * @{
+ */
+
+
+/**
+ * Identify the syntax and semantics of the bitstream.
+ * The principle is roughly:
+ * Two decoders with the same ID can decode the same streams.
+ * Two encoders with the same ID can encode compatible streams.
+ * There may be slight deviations from the principle due to implementation
+ * details.
+ *
+ * If you add a codec ID to this list, add it so that
+ * 1. no value of an existing codec ID changes (that would break ABI),
+ * 2. it is as close as possible to similar codecs
+ *
+ * After adding new codec IDs, do not forget to add an entry to the codec
+ * descriptor list and bump libavcodec minor version.
+ */
+enum AVCodecID {
+    AV_CODEC_ID_NONE,
+
+    /* video codecs */
+    AV_CODEC_ID_MPEG1VIDEO,
+    AV_CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
+#if FF_API_XVMC
+    AV_CODEC_ID_MPEG2VIDEO_XVMC,
+#endif /* FF_API_XVMC */
+    AV_CODEC_ID_H261,
+    AV_CODEC_ID_H263,
+    AV_CODEC_ID_RV10,
+    AV_CODEC_ID_RV20,
+    AV_CODEC_ID_MJPEG,
+    AV_CODEC_ID_MJPEGB,
+    AV_CODEC_ID_LJPEG,
+    AV_CODEC_ID_SP5X,
+    AV_CODEC_ID_JPEGLS,
+    AV_CODEC_ID_MPEG4,
+    AV_CODEC_ID_RAWVIDEO,
+    AV_CODEC_ID_MSMPEG4V1,
+    AV_CODEC_ID_MSMPEG4V2,
+    AV_CODEC_ID_MSMPEG4V3,
+    AV_CODEC_ID_WMV1,
+    AV_CODEC_ID_WMV2,
+    AV_CODEC_ID_H263P,
+    AV_CODEC_ID_H263I,
+    AV_CODEC_ID_FLV1,
+    AV_CODEC_ID_SVQ1,
+    AV_CODEC_ID_SVQ3,
+    AV_CODEC_ID_DVVIDEO,
+    AV_CODEC_ID_HUFFYUV,
+    AV_CODEC_ID_CYUV,
+    AV_CODEC_ID_H264,
+    AV_CODEC_ID_INDEO3,
+    AV_CODEC_ID_VP3,
+    AV_CODEC_ID_THEORA,
+    AV_CODEC_ID_ASV1,
+    AV_CODEC_ID_ASV2,
+    AV_CODEC_ID_FFV1,
+    AV_CODEC_ID_4XM,
+    AV_CODEC_ID_VCR1,
+    AV_CODEC_ID_CLJR,
+    AV_CODEC_ID_MDEC,
+    AV_CODEC_ID_ROQ,
+    AV_CODEC_ID_INTERPLAY_VIDEO,
+    AV_CODEC_ID_XAN_WC3,
+    AV_CODEC_ID_XAN_WC4,
+    AV_CODEC_ID_RPZA,
+    AV_CODEC_ID_CINEPAK,
+    AV_CODEC_ID_WS_VQA,
+    AV_CODEC_ID_MSRLE,
+    AV_CODEC_ID_MSVIDEO1,
+    AV_CODEC_ID_IDCIN,
+    AV_CODEC_ID_8BPS,
+    AV_CODEC_ID_SMC,
+    AV_CODEC_ID_FLIC,
+    AV_CODEC_ID_TRUEMOTION1,
+    AV_CODEC_ID_VMDVIDEO,
+    AV_CODEC_ID_MSZH,
+    AV_CODEC_ID_ZLIB,
+    AV_CODEC_ID_QTRLE,
+    AV_CODEC_ID_TSCC,
+    AV_CODEC_ID_ULTI,
+    AV_CODEC_ID_QDRAW,
+    AV_CODEC_ID_VIXL,
+    AV_CODEC_ID_QPEG,
+    AV_CODEC_ID_PNG,
+    AV_CODEC_ID_PPM,
+    AV_CODEC_ID_PBM,
+    AV_CODEC_ID_PGM,
+    AV_CODEC_ID_PGMYUV,
+    AV_CODEC_ID_PAM,
+    AV_CODEC_ID_FFVHUFF,
+    AV_CODEC_ID_RV30,
+    AV_CODEC_ID_RV40,
+    AV_CODEC_ID_VC1,
+    AV_CODEC_ID_WMV3,
+    AV_CODEC_ID_LOCO,
+    AV_CODEC_ID_WNV1,
+    AV_CODEC_ID_AASC,
+    AV_CODEC_ID_INDEO2,
+    AV_CODEC_ID_FRAPS,
+    AV_CODEC_ID_TRUEMOTION2,
+    AV_CODEC_ID_BMP,
+    AV_CODEC_ID_CSCD,
+    AV_CODEC_ID_MMVIDEO,
+    AV_CODEC_ID_ZMBV,
+    AV_CODEC_ID_AVS,
+    AV_CODEC_ID_SMACKVIDEO,
+    AV_CODEC_ID_NUV,
+    AV_CODEC_ID_KMVC,
+    AV_CODEC_ID_FLASHSV,
+    AV_CODEC_ID_CAVS,
+    AV_CODEC_ID_JPEG2000,
+    AV_CODEC_ID_VMNC,
+    AV_CODEC_ID_VP5,
+    AV_CODEC_ID_VP6,
+    AV_CODEC_ID_VP6F,
+    AV_CODEC_ID_TARGA,
+    AV_CODEC_ID_DSICINVIDEO,
+    AV_CODEC_ID_TIERTEXSEQVIDEO,
+    AV_CODEC_ID_TIFF,
+    AV_CODEC_ID_GIF,
+    AV_CODEC_ID_DXA,
+    AV_CODEC_ID_DNXHD,
+    AV_CODEC_ID_THP,
+    AV_CODEC_ID_SGI,
+    AV_CODEC_ID_C93,
+    AV_CODEC_ID_BETHSOFTVID,
+    AV_CODEC_ID_PTX,
+    AV_CODEC_ID_TXD,
+    AV_CODEC_ID_VP6A,
+    AV_CODEC_ID_AMV,
+    AV_CODEC_ID_VB,
+    AV_CODEC_ID_PCX,
+    AV_CODEC_ID_SUNRAST,
+    AV_CODEC_ID_INDEO4,
+    AV_CODEC_ID_INDEO5,
+    AV_CODEC_ID_MIMIC,
+    AV_CODEC_ID_RL2,
+    AV_CODEC_ID_ESCAPE124,
+    AV_CODEC_ID_DIRAC,
+    AV_CODEC_ID_BFI,
+    AV_CODEC_ID_CMV,
+    AV_CODEC_ID_MOTIONPIXELS,
+    AV_CODEC_ID_TGV,
+    AV_CODEC_ID_TGQ,
+    AV_CODEC_ID_TQI,
+    AV_CODEC_ID_AURA,
+    AV_CODEC_ID_AURA2,
+    AV_CODEC_ID_V210X,
+    AV_CODEC_ID_TMV,
+    AV_CODEC_ID_V210,
+    AV_CODEC_ID_DPX,
+    AV_CODEC_ID_MAD,
+    AV_CODEC_ID_FRWU,
+    AV_CODEC_ID_FLASHSV2,
+    AV_CODEC_ID_CDGRAPHICS,
+    AV_CODEC_ID_R210,
+    AV_CODEC_ID_ANM,
+    AV_CODEC_ID_BINKVIDEO,
+    AV_CODEC_ID_IFF_ILBM,
+#define AV_CODEC_ID_IFF_BYTERUN1 AV_CODEC_ID_IFF_ILBM
+    AV_CODEC_ID_KGV1,
+    AV_CODEC_ID_YOP,
+    AV_CODEC_ID_VP8,
+    AV_CODEC_ID_PICTOR,
+    AV_CODEC_ID_ANSI,
+    AV_CODEC_ID_A64_MULTI,
+    AV_CODEC_ID_A64_MULTI5,
+    AV_CODEC_ID_R10K,
+    AV_CODEC_ID_MXPEG,
+    AV_CODEC_ID_LAGARITH,
+    AV_CODEC_ID_PRORES,
+    AV_CODEC_ID_JV,
+    AV_CODEC_ID_DFA,
+    AV_CODEC_ID_WMV3IMAGE,
+    AV_CODEC_ID_VC1IMAGE,
+    AV_CODEC_ID_UTVIDEO,
+    AV_CODEC_ID_BMV_VIDEO,
+    AV_CODEC_ID_VBLE,
+    AV_CODEC_ID_DXTORY,
+    AV_CODEC_ID_V410,
+    AV_CODEC_ID_XWD,
+    AV_CODEC_ID_CDXL,
+    AV_CODEC_ID_XBM,
+    AV_CODEC_ID_ZEROCODEC,
+    AV_CODEC_ID_MSS1,
+    AV_CODEC_ID_MSA1,
+    AV_CODEC_ID_TSCC2,
+    AV_CODEC_ID_MTS2,
+    AV_CODEC_ID_CLLC,
+    AV_CODEC_ID_MSS2,
+    AV_CODEC_ID_VP9,
+    AV_CODEC_ID_AIC,
+    AV_CODEC_ID_ESCAPE130,
+    AV_CODEC_ID_G2M,
+    AV_CODEC_ID_WEBP,
+    AV_CODEC_ID_HNM4_VIDEO,
+    AV_CODEC_ID_HEVC,
+#define AV_CODEC_ID_H265 AV_CODEC_ID_HEVC
+    AV_CODEC_ID_FIC,
+    AV_CODEC_ID_ALIAS_PIX,
+    AV_CODEC_ID_BRENDER_PIX,
+    AV_CODEC_ID_PAF_VIDEO,
+    AV_CODEC_ID_EXR,
+    AV_CODEC_ID_VP7,
+    AV_CODEC_ID_SANM,
+    AV_CODEC_ID_SGIRLE,
+    AV_CODEC_ID_MVC1,
+    AV_CODEC_ID_MVC2,
+    AV_CODEC_ID_HQX,
+    AV_CODEC_ID_TDSC,
+    AV_CODEC_ID_HQ_HQA,
+    AV_CODEC_ID_HAP,
+    AV_CODEC_ID_DDS,
+    AV_CODEC_ID_DXV,
+    AV_CODEC_ID_SCREENPRESSO,
+    AV_CODEC_ID_RSCC,
+
+    AV_CODEC_ID_Y41P = 0x8000,
+    AV_CODEC_ID_AVRP,
+    AV_CODEC_ID_012V,
+    AV_CODEC_ID_AVUI,
+    AV_CODEC_ID_AYUV,
+    AV_CODEC_ID_TARGA_Y216,
+    AV_CODEC_ID_V308,
+    AV_CODEC_ID_V408,
+    AV_CODEC_ID_YUV4,
+    AV_CODEC_ID_AVRN,
+    AV_CODEC_ID_CPIA,
+    AV_CODEC_ID_XFACE,
+    AV_CODEC_ID_SNOW,
+    AV_CODEC_ID_SMVJPEG,
+    AV_CODEC_ID_APNG,
+    AV_CODEC_ID_DAALA,
+    AV_CODEC_ID_CFHD,
+    AV_CODEC_ID_TRUEMOTION2RT,
+    AV_CODEC_ID_M101,
+    AV_CODEC_ID_MAGICYUV,
+    AV_CODEC_ID_SHEERVIDEO,
+    AV_CODEC_ID_YLC,
+
+    /* various PCM "codecs" */
+    AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
+    AV_CODEC_ID_PCM_S16LE = 0x10000,
+    AV_CODEC_ID_PCM_S16BE,
+    AV_CODEC_ID_PCM_U16LE,
+    AV_CODEC_ID_PCM_U16BE,
+    AV_CODEC_ID_PCM_S8,
+    AV_CODEC_ID_PCM_U8,
+    AV_CODEC_ID_PCM_MULAW,
+    AV_CODEC_ID_PCM_ALAW,
+    AV_CODEC_ID_PCM_S32LE,
+    AV_CODEC_ID_PCM_S32BE,
+    AV_CODEC_ID_PCM_U32LE,
+    AV_CODEC_ID_PCM_U32BE,
+    AV_CODEC_ID_PCM_S24LE,
+    AV_CODEC_ID_PCM_S24BE,
+    AV_CODEC_ID_PCM_U24LE,
+    AV_CODEC_ID_PCM_U24BE,
+    AV_CODEC_ID_PCM_S24DAUD,
+    AV_CODEC_ID_PCM_ZORK,
+    AV_CODEC_ID_PCM_S16LE_PLANAR,
+    AV_CODEC_ID_PCM_DVD,
+    AV_CODEC_ID_PCM_F32BE,
+    AV_CODEC_ID_PCM_F32LE,
+    AV_CODEC_ID_PCM_F64BE,
+    AV_CODEC_ID_PCM_F64LE,
+    AV_CODEC_ID_PCM_BLURAY,
+    AV_CODEC_ID_PCM_LXF,
+    AV_CODEC_ID_S302M,
+    AV_CODEC_ID_PCM_S8_PLANAR,
+    AV_CODEC_ID_PCM_S24LE_PLANAR,
+    AV_CODEC_ID_PCM_S32LE_PLANAR,
+    AV_CODEC_ID_PCM_S16BE_PLANAR,
+    /* new PCM "codecs" should be added right below this line starting with
+     * an explicit value of for example 0x10800
+     */
+
+    /* various ADPCM codecs */
+    AV_CODEC_ID_ADPCM_IMA_QT = 0x11000,
+    AV_CODEC_ID_ADPCM_IMA_WAV,
+    AV_CODEC_ID_ADPCM_IMA_DK3,
+    AV_CODEC_ID_ADPCM_IMA_DK4,
+    AV_CODEC_ID_ADPCM_IMA_WS,
+    AV_CODEC_ID_ADPCM_IMA_SMJPEG,
+    AV_CODEC_ID_ADPCM_MS,
+    AV_CODEC_ID_ADPCM_4XM,
+    AV_CODEC_ID_ADPCM_XA,
+    AV_CODEC_ID_ADPCM_ADX,
+    AV_CODEC_ID_ADPCM_EA,
+    AV_CODEC_ID_ADPCM_G726,
+    AV_CODEC_ID_ADPCM_CT,
+    AV_CODEC_ID_ADPCM_SWF,
+    AV_CODEC_ID_ADPCM_YAMAHA,
+    AV_CODEC_ID_ADPCM_SBPRO_4,
+    AV_CODEC_ID_ADPCM_SBPRO_3,
+    AV_CODEC_ID_ADPCM_SBPRO_2,
+    AV_CODEC_ID_ADPCM_THP,
+    AV_CODEC_ID_ADPCM_IMA_AMV,
+    AV_CODEC_ID_ADPCM_EA_R1,
+    AV_CODEC_ID_ADPCM_EA_R3,
+    AV_CODEC_ID_ADPCM_EA_R2,
+    AV_CODEC_ID_ADPCM_IMA_EA_SEAD,
+    AV_CODEC_ID_ADPCM_IMA_EA_EACS,
+    AV_CODEC_ID_ADPCM_EA_XAS,
+    AV_CODEC_ID_ADPCM_EA_MAXIS_XA,
+    AV_CODEC_ID_ADPCM_IMA_ISS,
+    AV_CODEC_ID_ADPCM_G722,
+    AV_CODEC_ID_ADPCM_IMA_APC,
+    AV_CODEC_ID_ADPCM_VIMA,
+#if FF_API_VIMA_DECODER
+    AV_CODEC_ID_VIMA = AV_CODEC_ID_ADPCM_VIMA,
+#endif
+
+    AV_CODEC_ID_ADPCM_AFC = 0x11800,
+    AV_CODEC_ID_ADPCM_IMA_OKI,
+    AV_CODEC_ID_ADPCM_DTK,
+    AV_CODEC_ID_ADPCM_IMA_RAD,
+    AV_CODEC_ID_ADPCM_G726LE,
+    AV_CODEC_ID_ADPCM_THP_LE,
+    AV_CODEC_ID_ADPCM_PSX,
+    AV_CODEC_ID_ADPCM_AICA,
+    AV_CODEC_ID_ADPCM_IMA_DAT4,
+    AV_CODEC_ID_ADPCM_MTAF,
+
+    /* AMR */
+    AV_CODEC_ID_AMR_NB = 0x12000,
+    AV_CODEC_ID_AMR_WB,
+
+    /* RealAudio codecs*/
+    AV_CODEC_ID_RA_144 = 0x13000,
+    AV_CODEC_ID_RA_288,
+
+    /* various DPCM codecs */
+    AV_CODEC_ID_ROQ_DPCM = 0x14000,
+    AV_CODEC_ID_INTERPLAY_DPCM,
+    AV_CODEC_ID_XAN_DPCM,
+    AV_CODEC_ID_SOL_DPCM,
+
+    AV_CODEC_ID_SDX2_DPCM = 0x14800,
+
+    /* audio codecs */
+    AV_CODEC_ID_MP2 = 0x15000,
+    AV_CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
+    AV_CODEC_ID_AAC,
+    AV_CODEC_ID_AC3,
+    AV_CODEC_ID_DTS,
+    AV_CODEC_ID_VORBIS,
+    AV_CODEC_ID_DVAUDIO,
+    AV_CODEC_ID_WMAV1,
+    AV_CODEC_ID_WMAV2,
+    AV_CODEC_ID_MACE3,
+    AV_CODEC_ID_MACE6,
+    AV_CODEC_ID_VMDAUDIO,
+    AV_CODEC_ID_FLAC,
+    AV_CODEC_ID_MP3ADU,
+    AV_CODEC_ID_MP3ON4,
+    AV_CODEC_ID_SHORTEN,
+    AV_CODEC_ID_ALAC,
+    AV_CODEC_ID_WESTWOOD_SND1,
+    AV_CODEC_ID_GSM, ///< as in Berlin toast format
+    AV_CODEC_ID_QDM2,
+    AV_CODEC_ID_COOK,
+    AV_CODEC_ID_TRUESPEECH,
+    AV_CODEC_ID_TTA,
+    AV_CODEC_ID_SMACKAUDIO,
+    AV_CODEC_ID_QCELP,
+    AV_CODEC_ID_WAVPACK,
+    AV_CODEC_ID_DSICINAUDIO,
+    AV_CODEC_ID_IMC,
+    AV_CODEC_ID_MUSEPACK7,
+    AV_CODEC_ID_MLP,
+    AV_CODEC_ID_GSM_MS, /* as found in WAV */
+    AV_CODEC_ID_ATRAC3,
+#if FF_API_VOXWARE
+    AV_CODEC_ID_VOXWARE,
+#endif
+    AV_CODEC_ID_APE,
+    AV_CODEC_ID_NELLYMOSER,
+    AV_CODEC_ID_MUSEPACK8,
+    AV_CODEC_ID_SPEEX,
+    AV_CODEC_ID_WMAVOICE,
+    AV_CODEC_ID_WMAPRO,
+    AV_CODEC_ID_WMALOSSLESS,
+    AV_CODEC_ID_ATRAC3P,
+    AV_CODEC_ID_EAC3,
+    AV_CODEC_ID_SIPR,
+    AV_CODEC_ID_MP1,
+    AV_CODEC_ID_TWINVQ,
+    AV_CODEC_ID_TRUEHD,
+    AV_CODEC_ID_MP4ALS,
+    AV_CODEC_ID_ATRAC1,
+    AV_CODEC_ID_BINKAUDIO_RDFT,
+    AV_CODEC_ID_BINKAUDIO_DCT,
+    AV_CODEC_ID_AAC_LATM,
+    AV_CODEC_ID_QDMC,
+    AV_CODEC_ID_CELT,
+    AV_CODEC_ID_G723_1,
+    AV_CODEC_ID_G729,
+    AV_CODEC_ID_8SVX_EXP,
+    AV_CODEC_ID_8SVX_FIB,
+    AV_CODEC_ID_BMV_AUDIO,
+    AV_CODEC_ID_RALF,
+    AV_CODEC_ID_IAC,
+    AV_CODEC_ID_ILBC,
+    AV_CODEC_ID_OPUS,
+    AV_CODEC_ID_COMFORT_NOISE,
+    AV_CODEC_ID_TAK,
+    AV_CODEC_ID_METASOUND,
+    AV_CODEC_ID_PAF_AUDIO,
+    AV_CODEC_ID_ON2AVC,
+    AV_CODEC_ID_DSS_SP,
+
+    AV_CODEC_ID_FFWAVESYNTH = 0x15800,
+    AV_CODEC_ID_SONIC,
+    AV_CODEC_ID_SONIC_LS,
+    AV_CODEC_ID_EVRC,
+    AV_CODEC_ID_SMV,
+    AV_CODEC_ID_DSD_LSBF,
+    AV_CODEC_ID_DSD_MSBF,
+    AV_CODEC_ID_DSD_LSBF_PLANAR,
+    AV_CODEC_ID_DSD_MSBF_PLANAR,
+    AV_CODEC_ID_4GV,
+    AV_CODEC_ID_INTERPLAY_ACM,
+    AV_CODEC_ID_XMA1,
+    AV_CODEC_ID_XMA2,
+    AV_CODEC_ID_DST,
+
+    /* subtitle codecs */
+    AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
+    AV_CODEC_ID_DVD_SUBTITLE = 0x17000,
+    AV_CODEC_ID_DVB_SUBTITLE,
+    AV_CODEC_ID_TEXT,  ///< raw UTF-8 text
+    AV_CODEC_ID_XSUB,
+    AV_CODEC_ID_SSA,
+    AV_CODEC_ID_MOV_TEXT,
+    AV_CODEC_ID_HDMV_PGS_SUBTITLE,
+    AV_CODEC_ID_DVB_TELETEXT,
+    AV_CODEC_ID_SRT,
+
+    AV_CODEC_ID_MICRODVD   = 0x17800,
+    AV_CODEC_ID_EIA_608,
+    AV_CODEC_ID_JACOSUB,
+    AV_CODEC_ID_SAMI,
+    AV_CODEC_ID_REALTEXT,
+    AV_CODEC_ID_STL,
+    AV_CODEC_ID_SUBVIEWER1,
+    AV_CODEC_ID_SUBVIEWER,
+    AV_CODEC_ID_SUBRIP,
+    AV_CODEC_ID_WEBVTT,
+    AV_CODEC_ID_MPL2,
+    AV_CODEC_ID_VPLAYER,
+    AV_CODEC_ID_PJS,
+    AV_CODEC_ID_ASS,
+    AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+
+    /* other specific kind of codecs (generally used for attachments) */
+    AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
+    AV_CODEC_ID_TTF = 0x18000,
+
+    AV_CODEC_ID_BINTEXT    = 0x18800,
+    AV_CODEC_ID_XBIN,
+    AV_CODEC_ID_IDF,
+    AV_CODEC_ID_OTF,
+    AV_CODEC_ID_SMPTE_KLV,
+    AV_CODEC_ID_DVD_NAV,
+    AV_CODEC_ID_TIMED_ID3,
+    AV_CODEC_ID_BIN_DATA,
+
+
+    AV_CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like AV_CODEC_ID_NONE) but lavf should attempt to identify it
+
+    AV_CODEC_ID_MPEG2TS = 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
+                                * stream (only used by libavformat) */
+    AV_CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems
+                                * stream (only used by libavformat) */
+    AV_CODEC_ID_FFMETADATA = 0x21000,   ///< Dummy codec for streams containing only metadata information.
+    AV_CODEC_ID_WRAPPED_AVFRAME = 0x21001, ///< Passthrough codec, AVFrames wrapped in AVPacket
+};
+
+/**
+ * This struct describes the properties of a single codec described by an
+ * AVCodecID.
+ * @see avcodec_descriptor_get()
+ */
+typedef struct AVCodecDescriptor {
+    enum AVCodecID     id;
+    enum AVMediaType type;
+    /**
+     * Name of the codec described by this descriptor. It is non-empty and
+     * unique for each codec descriptor. It should contain alphanumeric
+     * characters and '_' only.
+     */
+    const char      *name;
+    /**
+     * A more descriptive name for this codec. May be NULL.
+     */
+    const char *long_name;
+    /**
+     * Codec properties, a combination of AV_CODEC_PROP_* flags.
+     */
+    int             props;
+    /**
+     * MIME type(s) associated with the codec.
+     * May be NULL; if not, a NULL-terminated array of MIME types.
+     * The first item is always non-NULL and is the preferred MIME type.
+     */
+    const char *const *mime_types;
+    /**
+     * If non-NULL, an array of profiles recognized for this codec.
+     * Terminated with FF_PROFILE_UNKNOWN.
+     */
+    const struct AVProfile *profiles;
+} AVCodecDescriptor;
+
+/**
+ * Codec uses only intra compression.
+ * Video codecs only.
+ */
+#define AV_CODEC_PROP_INTRA_ONLY    (1 << 0)
+/**
+ * Codec supports lossy compression. Audio and video codecs only.
+ * @note a codec may support both lossy and lossless
+ * compression modes
+ */
+#define AV_CODEC_PROP_LOSSY         (1 << 1)
+/**
+ * Codec supports lossless compression. Audio and video codecs only.
+ */
+#define AV_CODEC_PROP_LOSSLESS      (1 << 2)
+/**
+ * Codec supports frame reordering. That is, the coded order (the order in which
+ * the encoded packets are output by the encoders / stored / input to the
+ * decoders) may be different from the presentation order of the corresponding
+ * frames.
+ *
+ * For codecs that do not have this property set, PTS and DTS should always be
+ * equal.
+ */
+#define AV_CODEC_PROP_REORDER       (1 << 3)
+/**
+ * Subtitle codec is bitmap based
+ * Decoded AVSubtitle data can be read from the AVSubtitleRect->pict field.
+ */
+#define AV_CODEC_PROP_BITMAP_SUB    (1 << 16)
+/**
+ * Subtitle codec is text based.
+ * Decoded AVSubtitle data can be read from the AVSubtitleRect->ass field.
+ */
+#define AV_CODEC_PROP_TEXT_SUB      (1 << 17)
+
+/**
+ * @ingroup lavc_decoding
+ * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
+ * This is mainly needed because some optimized bitstream readers read
+ * 32 or 64 bit at once and could read over the end.<br>
+ * Note: If the first 23 bits of the additional bytes are not 0, then damaged
+ * MPEG bitstreams could cause overread and segfault.
+ */
+#define AV_INPUT_BUFFER_PADDING_SIZE 32
+
+/**
+ * @ingroup lavc_encoding
+ * minimum encoding buffer size
+ * Used to avoid some checks during header writing.
+ */
+#define AV_INPUT_BUFFER_MIN_SIZE 16384
+
+#if FF_API_WITHOUT_PREFIX
+/**
+ * @deprecated use AV_INPUT_BUFFER_PADDING_SIZE instead
+ */
+#define FF_INPUT_BUFFER_PADDING_SIZE 32
+
+/**
+ * @deprecated use AV_INPUT_BUFFER_MIN_SIZE instead
+ */
+#define FF_MIN_BUFFER_SIZE 16384
+#endif /* FF_API_WITHOUT_PREFIX */
+
+/**
+ * @ingroup lavc_encoding
+ * motion estimation type.
+ * @deprecated use codec private option instead
+ */
+#if FF_API_MOTION_EST
+enum Motion_Est_ID {
+    ME_ZERO = 1,    ///< no search, that is use 0,0 vector whenever one is needed
+    ME_FULL,
+    ME_LOG,
+    ME_PHODS,
+    ME_EPZS,        ///< enhanced predictive zonal search
+    ME_X1,          ///< reserved for experiments
+    ME_HEX,         ///< hexagon based search
+    ME_UMH,         ///< uneven multi-hexagon search
+    ME_TESA,        ///< transformed exhaustive search algorithm
+    ME_ITER=50,     ///< iterative search
+};
+#endif
+
+/**
+ * @ingroup lavc_decoding
+ */
+enum AVDiscard{
+    /* We leave some space between them for extensions (drop some
+     * keyframes for intra-only or drop just some bidir frames). */
+    AVDISCARD_NONE    =-16, ///< discard nothing
+    AVDISCARD_DEFAULT =  0, ///< discard useless packets like 0 size packets in avi
+    AVDISCARD_NONREF  =  8, ///< discard all non reference
+    AVDISCARD_BIDIR   = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONINTRA= 24, ///< discard all non intra frames
+    AVDISCARD_NONKEY  = 32, ///< discard all frames except keyframes
+    AVDISCARD_ALL     = 48, ///< discard all
+};
+
+enum AVAudioServiceType {
+    AV_AUDIO_SERVICE_TYPE_MAIN              = 0,
+    AV_AUDIO_SERVICE_TYPE_EFFECTS           = 1,
+    AV_AUDIO_SERVICE_TYPE_VISUALLY_IMPAIRED = 2,
+    AV_AUDIO_SERVICE_TYPE_HEARING_IMPAIRED  = 3,
+    AV_AUDIO_SERVICE_TYPE_DIALOGUE          = 4,
+    AV_AUDIO_SERVICE_TYPE_COMMENTARY        = 5,
+    AV_AUDIO_SERVICE_TYPE_EMERGENCY         = 6,
+    AV_AUDIO_SERVICE_TYPE_VOICE_OVER        = 7,
+    AV_AUDIO_SERVICE_TYPE_KARAOKE           = 8,
+    AV_AUDIO_SERVICE_TYPE_NB                   , ///< Not part of ABI
+};
+
+/**
+ * @ingroup lavc_encoding
+ */
+typedef struct RcOverride{
+    int start_frame;
+    int end_frame;
+    int qscale; // If this is 0 then quality_factor will be used instead.
+    float quality_factor;
+} RcOverride;
+
+#if FF_API_MAX_BFRAMES
+/**
+ * @deprecated there is no libavcodec-wide limit on the number of B-frames
+ */
+#define FF_MAX_B_FRAMES 16
+#endif
+
+/* encoding support
+   These flags can be passed in AVCodecContext.flags before initialization.
+   Note: Not everything is supported yet.
+*/
+
+/**
+ * Allow decoders to produce frames with data planes that are not aligned
+ * to CPU requirements (e.g. due to cropping).
+ */
+#define AV_CODEC_FLAG_UNALIGNED       (1 <<  0)
+/**
+ * Use fixed qscale.
+ */
+#define AV_CODEC_FLAG_QSCALE          (1 <<  1)
+/**
+ * 4 MV per MB allowed / advanced prediction for H.263.
+ */
+#define AV_CODEC_FLAG_4MV             (1 <<  2)
+/**
+ * Output even those frames that might be corrupted.
+ */
+#define AV_CODEC_FLAG_OUTPUT_CORRUPT  (1 <<  3)
+/**
+ * Use qpel MC.
+ */
+#define AV_CODEC_FLAG_QPEL            (1 <<  4)
+/**
+ * Use internal 2pass ratecontrol in first pass mode.
+ */
+#define AV_CODEC_FLAG_PASS1           (1 <<  9)
+/**
+ * Use internal 2pass ratecontrol in second pass mode.
+ */
+#define AV_CODEC_FLAG_PASS2           (1 << 10)
+/**
+ * loop filter.
+ */
+#define AV_CODEC_FLAG_LOOP_FILTER     (1 << 11)
+/**
+ * Only decode/encode grayscale.
+ */
+#define AV_CODEC_FLAG_GRAY            (1 << 13)
+/**
+ * error[?] variables will be set during encoding.
+ */
+#define AV_CODEC_FLAG_PSNR            (1 << 15)
+/**
+ * Input bitstream might be truncated at a random location
+ * instead of only at frame boundaries.
+ */
+#define AV_CODEC_FLAG_TRUNCATED       (1 << 16)
+/**
+ * Use interlaced DCT.
+ */
+#define AV_CODEC_FLAG_INTERLACED_DCT  (1 << 18)
+/**
+ * Force low delay.
+ */
+#define AV_CODEC_FLAG_LOW_DELAY       (1 << 19)
+/**
+ * Place global headers in extradata instead of every keyframe.
+ */
+#define AV_CODEC_FLAG_GLOBAL_HEADER   (1 << 22)
+/**
+ * Use only bitexact stuff (except (I)DCT).
+ */
+#define AV_CODEC_FLAG_BITEXACT        (1 << 23)
+/* Fx : Flag for H.263+ extra options */
+/**
+ * H.263 advanced intra coding / MPEG-4 AC prediction
+ */
+#define AV_CODEC_FLAG_AC_PRED         (1 << 24)
+/**
+ * interlaced motion estimation
+ */
+#define AV_CODEC_FLAG_INTERLACED_ME   (1 << 29)
+#define AV_CODEC_FLAG_CLOSED_GOP      (1U << 31)
+
+/**
+ * Allow non spec compliant speedup tricks.
+ */
+#define AV_CODEC_FLAG2_FAST           (1 <<  0)
+/**
+ * Skip bitstream encoding.
+ */
+#define AV_CODEC_FLAG2_NO_OUTPUT      (1 <<  2)
+/**
+ * Place global headers at every keyframe instead of in extradata.
+ */
+#define AV_CODEC_FLAG2_LOCAL_HEADER   (1 <<  3)
+
+/**
+ * timecode is in drop frame format. DEPRECATED!!!!
+ */
+#define AV_CODEC_FLAG2_DROP_FRAME_TIMECODE (1 << 13)
+
+/**
+ * Input bitstream might be truncated at a packet boundaries
+ * instead of only at frame boundaries.
+ */
+#define AV_CODEC_FLAG2_CHUNKS         (1 << 15)
+/**
+ * Discard cropping information from SPS.
+ */
+#define AV_CODEC_FLAG2_IGNORE_CROP    (1 << 16)
+
+/**
+ * Show all frames before the first keyframe
+ */
+#define AV_CODEC_FLAG2_SHOW_ALL       (1 << 22)
+/**
+ * Export motion vectors through frame side data
+ */
+#define AV_CODEC_FLAG2_EXPORT_MVS     (1 << 28)
+/**
+ * Do not skip samples and export skip information as frame side data
+ */
+#define AV_CODEC_FLAG2_SKIP_MANUAL    (1 << 29)
+/**
+ * Do not reset ASS ReadOrder field on flush (subtitles decoding)
+ */
+#define AV_CODEC_FLAG2_RO_FLUSH_NOOP  (1 << 30)
+
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independent Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+
+/**
+ * Decoder can use draw_horiz_band callback.
+ */
+#define AV_CODEC_CAP_DRAW_HORIZ_BAND     (1 <<  0)
+/**
+ * Codec uses get_buffer() for allocating buffers and supports custom allocators.
+ * If not set, it might not use get_buffer() at all or use operations that
+ * assume the buffer was allocated by avcodec_default_get_buffer.
+ */
+#define AV_CODEC_CAP_DR1                 (1 <<  1)
+#define AV_CODEC_CAP_TRUNCATED           (1 <<  3)
+/**
+ * Encoder or decoder requires flushing with NULL input at the end in order to
+ * give the complete and correct output.
+ *
+ * NOTE: If this flag is not set, the codec is guaranteed to never be fed with
+ *       with NULL data. The user can still send NULL data to the public encode
+ *       or decode function, but libavcodec will not pass it along to the codec
+ *       unless this flag is set.
+ *
+ * Decoders:
+ * The decoder has a non-zero delay and needs to be fed with avpkt->data=NULL,
+ * avpkt->size=0 at the end to get the delayed data until the decoder no longer
+ * returns frames.
+ *
+ * Encoders:
+ * The encoder needs to be fed with NULL data at the end of encoding until the
+ * encoder no longer returns data.
+ *
+ * NOTE: For encoders implementing the AVCodec.encode2() function, setting this
+ *       flag also means that the encoder must set the pts and duration for
+ *       each output packet. If this flag is not set, the pts and duration will
+ *       be determined by libavcodec from the input frame.
+ */
+#define AV_CODEC_CAP_DELAY               (1 <<  5)
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define AV_CODEC_CAP_SMALL_LAST_FRAME    (1 <<  6)
+
+#if FF_API_CAP_VDPAU
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define AV_CODEC_CAP_HWACCEL_VDPAU       (1 <<  7)
+#endif
+
+/**
+ * Codec can output multiple frames per AVPacket
+ * Normally demuxers return one frame at a time, demuxers which do not do
+ * are connected to a parser to split what they return into proper frames.
+ * This flag is reserved to the very rare category of codecs which have a
+ * bitstream that cannot be split into frames without timeconsuming
+ * operations like full decoding. Demuxers carrying such bitstreams thus
+ * may return multiple frames in a packet. This has many disadvantages like
+ * prohibiting stream copy in many cases thus it should only be considered
+ * as a last resort.
+ */
+#define AV_CODEC_CAP_SUBFRAMES           (1 <<  8)
+/**
+ * Codec is experimental and is thus avoided in favor of non experimental
+ * encoders
+ */
+#define AV_CODEC_CAP_EXPERIMENTAL        (1 <<  9)
+/**
+ * Codec should fill in channel configuration and samplerate instead of container
+ */
+#define AV_CODEC_CAP_CHANNEL_CONF        (1 << 10)
+/**
+ * Codec supports frame-level multithreading.
+ */
+#define AV_CODEC_CAP_FRAME_THREADS       (1 << 12)
+/**
+ * Codec supports slice-based (or partition-based) multithreading.
+ */
+#define AV_CODEC_CAP_SLICE_THREADS       (1 << 13)
+/**
+ * Codec supports changed parameters at any point.
+ */
+#define AV_CODEC_CAP_PARAM_CHANGE        (1 << 14)
+/**
+ * Codec supports avctx->thread_count == 0 (auto).
+ */
+#define AV_CODEC_CAP_AUTO_THREADS        (1 << 15)
+/**
+ * Audio encoder supports receiving a different number of samples in each call.
+ */
+#define AV_CODEC_CAP_VARIABLE_FRAME_SIZE (1 << 16)
+/**
+ * Codec is intra only.
+ */
+#define AV_CODEC_CAP_INTRA_ONLY       0x40000000
+/**
+ * Codec is lossless.
+ */
+#define AV_CODEC_CAP_LOSSLESS         0x80000000
+
+
+#if FF_API_WITHOUT_PREFIX
+/**
+ * Allow decoders to produce frames with data planes that are not aligned
+ * to CPU requirements (e.g. due to cropping).
+ */
+#define CODEC_FLAG_UNALIGNED AV_CODEC_FLAG_UNALIGNED
+#define CODEC_FLAG_QSCALE AV_CODEC_FLAG_QSCALE
+#define CODEC_FLAG_4MV    AV_CODEC_FLAG_4MV
+#define CODEC_FLAG_OUTPUT_CORRUPT AV_CODEC_FLAG_OUTPUT_CORRUPT
+#define CODEC_FLAG_QPEL   AV_CODEC_FLAG_QPEL
+#if FF_API_GMC
+/**
+ * @deprecated use the "gmc" private option of the libxvid encoder
+ */
+#define CODEC_FLAG_GMC    0x0020  ///< Use GMC.
+#endif
+#if FF_API_MV0
+/**
+ * @deprecated use the flag "mv0" in the "mpv_flags" private option of the
+ * mpegvideo encoders
+ */
+#define CODEC_FLAG_MV0    0x0040
+#endif
+#if FF_API_INPUT_PRESERVED
+/**
+ * @deprecated passing reference-counted frames to the encoders replaces this
+ * flag
+ */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100
+#endif
+#define CODEC_FLAG_PASS1           AV_CODEC_FLAG_PASS1
+#define CODEC_FLAG_PASS2           AV_CODEC_FLAG_PASS2
+#define CODEC_FLAG_GRAY            AV_CODEC_FLAG_GRAY
+#if FF_API_EMU_EDGE
+/**
+ * @deprecated edges are not used/required anymore. I.e. this flag is now always
+ * set.
+ */
+#define CODEC_FLAG_EMU_EDGE        0x4000
+#endif
+#define CODEC_FLAG_PSNR            AV_CODEC_FLAG_PSNR
+#define CODEC_FLAG_TRUNCATED       AV_CODEC_FLAG_TRUNCATED
+
+#if FF_API_NORMALIZE_AQP
+/**
+ * @deprecated use the flag "naq" in the "mpv_flags" private option of the
+ * mpegvideo encoders
+ */
+#define CODEC_FLAG_NORMALIZE_AQP  0x00020000
+#endif
+#define CODEC_FLAG_INTERLACED_DCT AV_CODEC_FLAG_INTERLACED_DCT
+#define CODEC_FLAG_LOW_DELAY      AV_CODEC_FLAG_LOW_DELAY
+#define CODEC_FLAG_GLOBAL_HEADER  AV_CODEC_FLAG_GLOBAL_HEADER
+#define CODEC_FLAG_BITEXACT       AV_CODEC_FLAG_BITEXACT
+#define CODEC_FLAG_AC_PRED        AV_CODEC_FLAG_AC_PRED
+#define CODEC_FLAG_LOOP_FILTER    AV_CODEC_FLAG_LOOP_FILTER
+#define CODEC_FLAG_INTERLACED_ME  AV_CODEC_FLAG_INTERLACED_ME
+#define CODEC_FLAG_CLOSED_GOP     AV_CODEC_FLAG_CLOSED_GOP
+#define CODEC_FLAG2_FAST          AV_CODEC_FLAG2_FAST
+#define CODEC_FLAG2_NO_OUTPUT     AV_CODEC_FLAG2_NO_OUTPUT
+#define CODEC_FLAG2_LOCAL_HEADER  AV_CODEC_FLAG2_LOCAL_HEADER
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE AV_CODEC_FLAG2_DROP_FRAME_TIMECODE
+#define CODEC_FLAG2_IGNORE_CROP   AV_CODEC_FLAG2_IGNORE_CROP
+
+#define CODEC_FLAG2_CHUNKS        AV_CODEC_FLAG2_CHUNKS
+#define CODEC_FLAG2_SHOW_ALL      AV_CODEC_FLAG2_SHOW_ALL
+#define CODEC_FLAG2_EXPORT_MVS    AV_CODEC_FLAG2_EXPORT_MVS
+#define CODEC_FLAG2_SKIP_MANUAL   AV_CODEC_FLAG2_SKIP_MANUAL
+
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independent Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+
+#define CODEC_CAP_DRAW_HORIZ_BAND AV_CODEC_CAP_DRAW_HORIZ_BAND ///< Decoder can use draw_horiz_band callback.
+/**
+ * Codec uses get_buffer() for allocating buffers and supports custom allocators.
+ * If not set, it might not use get_buffer() at all or use operations that
+ * assume the buffer was allocated by avcodec_default_get_buffer.
+ */
+#define CODEC_CAP_DR1             AV_CODEC_CAP_DR1
+#define CODEC_CAP_TRUNCATED       AV_CODEC_CAP_TRUNCATED
+#if FF_API_XVMC
+/* Codec can export data for HW decoding. This flag indicates that
+ * the codec would call get_format() with list that might contain HW accelerated
+ * pixel formats (XvMC, VDPAU, VAAPI, etc). The application can pick any of them
+ * including raw image format.
+ * The application can use the passed context to determine bitstream version,
+ * chroma format, resolution etc.
+ */
+#define CODEC_CAP_HWACCEL         0x0010
+#endif /* FF_API_XVMC */
+/**
+ * Encoder or decoder requires flushing with NULL input at the end in order to
+ * give the complete and correct output.
+ *
+ * NOTE: If this flag is not set, the codec is guaranteed to never be fed with
+ *       with NULL data. The user can still send NULL data to the public encode
+ *       or decode function, but libavcodec will not pass it along to the codec
+ *       unless this flag is set.
+ *
+ * Decoders:
+ * The decoder has a non-zero delay and needs to be fed with avpkt->data=NULL,
+ * avpkt->size=0 at the end to get the delayed data until the decoder no longer
+ * returns frames.
+ *
+ * Encoders:
+ * The encoder needs to be fed with NULL data at the end of encoding until the
+ * encoder no longer returns data.
+ *
+ * NOTE: For encoders implementing the AVCodec.encode2() function, setting this
+ *       flag also means that the encoder must set the pts and duration for
+ *       each output packet. If this flag is not set, the pts and duration will
+ *       be determined by libavcodec from the input frame.
+ */
+#define CODEC_CAP_DELAY           AV_CODEC_CAP_DELAY
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define CODEC_CAP_SMALL_LAST_FRAME AV_CODEC_CAP_SMALL_LAST_FRAME
+#if FF_API_CAP_VDPAU
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU    AV_CODEC_CAP_HWACCEL_VDPAU
+#endif
+/**
+ * Codec can output multiple frames per AVPacket
+ * Normally demuxers return one frame at a time, demuxers which do not do
+ * are connected to a parser to split what they return into proper frames.
+ * This flag is reserved to the very rare category of codecs which have a
+ * bitstream that cannot be split into frames without timeconsuming
+ * operations like full decoding. Demuxers carrying such bitstreams thus
+ * may return multiple frames in a packet. This has many disadvantages like
+ * prohibiting stream copy in many cases thus it should only be considered
+ * as a last resort.
+ */
+#define CODEC_CAP_SUBFRAMES        AV_CODEC_CAP_SUBFRAMES
+/**
+ * Codec is experimental and is thus avoided in favor of non experimental
+ * encoders
+ */
+#define CODEC_CAP_EXPERIMENTAL     AV_CODEC_CAP_EXPERIMENTAL
+/**
+ * Codec should fill in channel configuration and samplerate instead of container
+ */
+#define CODEC_CAP_CHANNEL_CONF     AV_CODEC_CAP_CHANNEL_CONF
+#if FF_API_NEG_LINESIZES
+/**
+ * @deprecated no codecs use this capability
+ */
+#define CODEC_CAP_NEG_LINESIZES    0x0800
+#endif
+/**
+ * Codec supports frame-level multithreading.
+ */
+#define CODEC_CAP_FRAME_THREADS    AV_CODEC_CAP_FRAME_THREADS
+/**
+ * Codec supports slice-based (or partition-based) multithreading.
+ */
+#define CODEC_CAP_SLICE_THREADS    AV_CODEC_CAP_SLICE_THREADS
+/**
+ * Codec supports changed parameters at any point.
+ */
+#define CODEC_CAP_PARAM_CHANGE     AV_CODEC_CAP_PARAM_CHANGE
+/**
+ * Codec supports avctx->thread_count == 0 (auto).
+ */
+#define CODEC_CAP_AUTO_THREADS     AV_CODEC_CAP_AUTO_THREADS
+/**
+ * Audio encoder supports receiving a different number of samples in each call.
+ */
+#define CODEC_CAP_VARIABLE_FRAME_SIZE AV_CODEC_CAP_VARIABLE_FRAME_SIZE
+/**
+ * Codec is intra only.
+ */
+#define CODEC_CAP_INTRA_ONLY       AV_CODEC_CAP_INTRA_ONLY
+/**
+ * Codec is lossless.
+ */
+#define CODEC_CAP_LOSSLESS         AV_CODEC_CAP_LOSSLESS
+
+/**
+ * HWAccel is experimental and is thus avoided in favor of non experimental
+ * codecs
+ */
+#define HWACCEL_CODEC_CAP_EXPERIMENTAL     0x0200
+#endif /* FF_API_WITHOUT_PREFIX */
+
+#if FF_API_MB_TYPE
+//The following defines may change, don't expect compatibility if you use them.
+#define MB_TYPE_INTRA4x4   0x0001
+#define MB_TYPE_INTRA16x16 0x0002 //FIXME H.264-specific
+#define MB_TYPE_INTRA_PCM  0x0004 //FIXME H.264-specific
+#define MB_TYPE_16x16      0x0008
+#define MB_TYPE_16x8       0x0010
+#define MB_TYPE_8x16       0x0020
+#define MB_TYPE_8x8        0x0040
+#define MB_TYPE_INTERLACED 0x0080
+#define MB_TYPE_DIRECT2    0x0100 //FIXME
+#define MB_TYPE_ACPRED     0x0200
+#define MB_TYPE_GMC        0x0400
+#define MB_TYPE_SKIP       0x0800
+#define MB_TYPE_P0L0       0x1000
+#define MB_TYPE_P1L0       0x2000
+#define MB_TYPE_P0L1       0x4000
+#define MB_TYPE_P1L1       0x8000
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      0x00010000
+#define MB_TYPE_CBP        0x00020000
+// Note bits 24-31 are reserved for codec specific use (H.264 ref0, MPEG-1 0mv, ...)
+#endif
+
+/**
+ * Pan Scan area.
+ * This specifies the area which should be displayed.
+ * Note there may be multiple such areas for one frame.
+ */
+typedef struct AVPanScan{
+    /**
+     * id
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int id;
+
+    /**
+     * width and height in 1/16 pel
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int width;
+    int height;
+
+    /**
+     * position of the top left corner in 1/16 pel for up to 3 fields/frames
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int16_t position[3][2];
+}AVPanScan;
+
+/**
+ * This structure describes the bitrate properties of an encoded bitstream. It
+ * roughly corresponds to a subset the VBV parameters for MPEG-2 or HRD
+ * parameters for H.264/HEVC.
+ */
+typedef struct AVCPBProperties {
+    /**
+     * Maximum bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int max_bitrate;
+    /**
+     * Minimum bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int min_bitrate;
+    /**
+     * Average bitrate of the stream, in bits per second.
+     * Zero if unknown or unspecified.
+     */
+    int avg_bitrate;
+
+    /**
+     * The size of the buffer to which the ratecontrol is applied, in bits.
+     * Zero if unknown or unspecified.
+     */
+    int buffer_size;
+
+    /**
+     * The delay between the time the packet this structure is associated with
+     * is received and the time when it should be decoded, in periods of a 27MHz
+     * clock.
+     *
+     * UINT64_MAX when unknown or unspecified.
+     */
+    uint64_t vbv_delay;
+} AVCPBProperties;
+
+#if FF_API_QSCALE_TYPE
+#define FF_QSCALE_TYPE_MPEG1 0
+#define FF_QSCALE_TYPE_MPEG2 1
+#define FF_QSCALE_TYPE_H264  2
+#define FF_QSCALE_TYPE_VP56  3
+#endif
+
+/**
+ * The decoder will keep a reference to the frame and may reuse it later.
+ */
+#define AV_GET_BUFFER_FLAG_REF (1 << 0)
+
+/**
+ * @defgroup lavc_packet AVPacket
+ *
+ * Types and functions for working with AVPacket.
+ * @{
+ */
+enum AVPacketSideDataType {
+    AV_PKT_DATA_PALETTE,
+    AV_PKT_DATA_NEW_EXTRADATA,
+
+    /**
+     * An AV_PKT_DATA_PARAM_CHANGE side data packet is laid out as follows:
+     * @code
+     * u32le param_flags
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT)
+     *     s32le channel_count
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT)
+     *     u64le channel_layout
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE)
+     *     s32le sample_rate
+     * if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS)
+     *     s32le width
+     *     s32le height
+     * @endcode
+     */
+    AV_PKT_DATA_PARAM_CHANGE,
+
+    /**
+     * An AV_PKT_DATA_H263_MB_INFO side data packet contains a number of
+     * structures with info about macroblocks relevant to splitting the
+     * packet into smaller packets on macroblock edges (e.g. as for RFC 2190).
+     * That is, it does not necessarily contain info about all macroblocks,
+     * as long as the distance between macroblocks in the info is smaller
+     * than the target payload size.
+     * Each MB info structure is 12 bytes, and is laid out as follows:
+     * @code
+     * u32le bit offset from the start of the packet
+     * u8    current quantizer at the start of the macroblock
+     * u8    GOB number
+     * u16le macroblock address within the GOB
+     * u8    horizontal MV predictor
+     * u8    vertical MV predictor
+     * u8    horizontal MV predictor for block number 3
+     * u8    vertical MV predictor for block number 3
+     * @endcode
+     */
+    AV_PKT_DATA_H263_MB_INFO,
+
+    /**
+     * This side data should be associated with an audio stream and contains
+     * ReplayGain information in form of the AVReplayGain struct.
+     */
+    AV_PKT_DATA_REPLAYGAIN,
+
+    /**
+     * This side data contains a 3x3 transformation matrix describing an affine
+     * transformation that needs to be applied to the decoded video frames for
+     * correct presentation.
+     *
+     * See libavutil/display.h for a detailed description of the data.
+     */
+    AV_PKT_DATA_DISPLAYMATRIX,
+
+    /**
+     * This side data should be associated with a video stream and contains
+     * Stereoscopic 3D information in form of the AVStereo3D struct.
+     */
+    AV_PKT_DATA_STEREO3D,
+
+    /**
+     * This side data should be associated with an audio stream and corresponds
+     * to enum AVAudioServiceType.
+     */
+    AV_PKT_DATA_AUDIO_SERVICE_TYPE,
+
+    /**
+     * This side data contains quality related information from the encoder.
+     * @code
+     * u32le quality factor of the compressed frame. Allowed range is between 1 (good) and FF_LAMBDA_MAX (bad).
+     * u8    picture type
+     * u8    error count
+     * u16   reserved
+     * u64le[error count] sum of squared differences between encoder in and output
+     * @endcode
+     */
+    AV_PKT_DATA_QUALITY_STATS,
+
+    /**
+     * This side data contains an integer value representing the stream index
+     * of a "fallback" track.  A fallback track indicates an alternate
+     * track to use when the current track can not be decoded for some reason.
+     * e.g. no decoder available for codec.
+     */
+    AV_PKT_DATA_FALLBACK_TRACK,
+
+    /**
+     * This side data corresponds to the AVCPBProperties struct.
+     */
+    AV_PKT_DATA_CPB_PROPERTIES,
+
+    /**
+     * Recommmends skipping the specified number of samples
+     * @code
+     * u32le number of samples to skip from start of this packet
+     * u32le number of samples to skip from end of this packet
+     * u8    reason for start skip
+     * u8    reason for end   skip (0=padding silence, 1=convergence)
+     * @endcode
+     */
+    AV_PKT_DATA_SKIP_SAMPLES=70,
+
+    /**
+     * An AV_PKT_DATA_JP_DUALMONO side data packet indicates that
+     * the packet may contain "dual mono" audio specific to Japanese DTV
+     * and if it is true, recommends only the selected channel to be used.
+     * @code
+     * u8    selected channels (0=mail/left, 1=sub/right, 2=both)
+     * @endcode
+     */
+    AV_PKT_DATA_JP_DUALMONO,
+
+    /**
+     * A list of zero terminated key/value strings. There is no end marker for
+     * the list, so it is required to rely on the side data size to stop.
+     */
+    AV_PKT_DATA_STRINGS_METADATA,
+
+    /**
+     * Subtitle event position
+     * @code
+     * u32le x1
+     * u32le y1
+     * u32le x2
+     * u32le y2
+     * @endcode
+     */
+    AV_PKT_DATA_SUBTITLE_POSITION,
+
+    /**
+     * Data found in BlockAdditional element of matroska container. There is
+     * no end marker for the data, so it is required to rely on the side data
+     * size to recognize the end. 8 byte id (as found in BlockAddId) followed
+     * by data.
+     */
+    AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
+
+    /**
+     * The optional first identifier line of a WebVTT cue.
+     */
+    AV_PKT_DATA_WEBVTT_IDENTIFIER,
+
+    /**
+     * The optional settings (rendering instructions) that immediately
+     * follow the timestamp specifier of a WebVTT cue.
+     */
+    AV_PKT_DATA_WEBVTT_SETTINGS,
+
+    /**
+     * A list of zero terminated key/value strings. There is no end marker for
+     * the list, so it is required to rely on the side data size to stop. This
+     * side data includes updated metadata which appeared in the stream.
+     */
+    AV_PKT_DATA_METADATA_UPDATE,
+
+    /**
+     * MPEGTS stream ID, this is required to pass the stream ID
+     * information from the demuxer to the corresponding muxer.
+     */
+    AV_PKT_DATA_MPEGTS_STREAM_ID,
+
+    /**
+     * Mastering display metadata (based on SMPTE-2086:2014). This metadata
+     * should be associated with a video stream and containts data in the form
+     * of the AVMasteringDisplayMetadata struct.
+     */
+    AV_PKT_DATA_MASTERING_DISPLAY_METADATA
+};
+
+#define AV_PKT_DATA_QUALITY_FACTOR AV_PKT_DATA_QUALITY_STATS //DEPRECATED
+
+typedef struct AVPacketSideData {
+    uint8_t *data;
+    int      size;
+    enum AVPacketSideDataType type;
+} AVPacketSideData;
+
+/**
+ * This structure stores compressed data. It is typically exported by demuxers
+ * and then passed as input to decoders, or received as output from encoders and
+ * then passed to muxers.
+ *
+ * For video, it should typically contain one compressed frame. For audio it may
+ * contain several compressed frames. Encoders are allowed to output empty
+ * packets, with no compressed data, containing only side data
+ * (e.g. to update some stream parameters at the end of encoding).
+ *
+ * AVPacket is one of the few structs in FFmpeg, whose size is a part of public
+ * ABI. Thus it may be allocated on stack and no new fields can be added to it
+ * without libavcodec and libavformat major bump.
+ *
+ * The semantics of data ownership depends on the buf field.
+ * If it is set, the packet data is dynamically allocated and is
+ * valid indefinitely until a call to av_packet_unref() reduces the
+ * reference count to 0.
+ *
+ * If the buf field is not set av_packet_ref() would make a copy instead
+ * of increasing the reference count.
+ *
+ * The side data is always allocated with av_malloc(), copied by
+ * av_packet_ref() and freed by av_packet_unref().
+ *
+ * @see av_packet_ref
+ * @see av_packet_unref
+ */
+typedef struct AVPacket {
+    /**
+     * A reference to the reference-counted buffer where the packet data is
+     * stored.
+     * May be NULL, then the packet data is not reference-counted.
+     */
+    AVBufferRef *buf;
+    /**
+     * Presentation timestamp in AVStream->time_base units; the time at which
+     * the decompressed packet will be presented to the user.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     * pts MUST be larger or equal to dts as presentation cannot happen before
+     * decompression, unless one wants to view hex dumps. Some formats misuse
+     * the terms dts and pts/cts to mean something different. Such timestamps
+     * must be converted to true pts/dts before they are stored in AVPacket.
+     */
+    int64_t pts;
+    /**
+     * Decompression timestamp in AVStream->time_base units; the time at which
+     * the packet is decompressed.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     */
+    int64_t dts;
+    uint8_t *data;
+    int   size;
+    int   stream_index;
+    /**
+     * A combination of AV_PKT_FLAG values
+     */
+    int   flags;
+    /**
+     * Additional packet data that can be provided by the container.
+     * Packet can contain several types of side information.
+     */
+    AVPacketSideData *side_data;
+    int side_data_elems;
+
+    /**
+     * Duration of this packet in AVStream->time_base units, 0 if unknown.
+     * Equals next_pts - this_pts in presentation order.
+     */
+    int64_t duration;
+
+    int64_t pos;                            ///< byte position in stream, -1 if unknown
+
+#if FF_API_CONVERGENCE_DURATION
+    /**
+     * @deprecated Same as the duration field, but as int64_t. This was required
+     * for Matroska subtitles, whose duration values could overflow when the
+     * duration field was still an int.
+     */
+    attribute_deprecated
+    int64_t convergence_duration;
+#endif
+} AVPacket;
+#define AV_PKT_FLAG_KEY     0x0001 ///< The packet contains a keyframe
+#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
+
+enum AVSideDataParamChangeFlags {
+    AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT  = 0x0001,
+    AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT = 0x0002,
+    AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE    = 0x0004,
+    AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS     = 0x0008,
+};
+/**
+ * @}
+ */
+
+struct AVCodecInternal;
+
+enum AVFieldOrder {
+    AV_FIELD_UNKNOWN,
+    AV_FIELD_PROGRESSIVE,
+    AV_FIELD_TT,          //< Top coded_first, top displayed first
+    AV_FIELD_BB,          //< Bottom coded first, bottom displayed first
+    AV_FIELD_TB,          //< Top coded first, bottom displayed first
+    AV_FIELD_BT,          //< Bottom coded first, top displayed first
+};
+
+/**
+ * main external API structure.
+ * New fields can be added to the end with minor version bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump.
+ * Please use AVOptions (av_opt* / av_set/get*()) to access these fields from user
+ * applications.
+ * The name string for AVOptions options matches the associated command line
+ * parameter name and can be found in libavcodec/options_table.h
+ * The AVOption/command line parameter names differ in some cases from the C
+ * structure field names for historic reasons or brevity.
+ * sizeof(AVCodecContext) must not be used outside libav*.
+ */
+typedef struct AVCodecContext {
+    /**
+     * information on struct for av_log
+     * - set by avcodec_alloc_context3
+     */
+    const AVClass *av_class;
+    int log_level_offset;
+
+    enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */
+    const struct AVCodec  *codec;
+#if FF_API_CODEC_NAME
+    /**
+     * @deprecated this field is not used for anything in libavcodec
+     */
+    attribute_deprecated
+    char             codec_name[32];
+#endif
+    enum AVCodecID     codec_id; /* see AV_CODEC_ID_xxx */
+
+    /**
+     * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * A demuxer should set this to what is stored in the field used to identify the codec.
+     * If there are multiple such fields in a container then the demuxer should choose the one
+     * which maximizes the information about the used codec.
+     * If the codec tag field in a container is larger than 32 bits then the demuxer should
+     * remap the longer ID to 32 bits with a table or other structure. Alternatively a new
+     * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated
+     * first.
+     * - encoding: Set by user, if not then the default based on codec_id will be used.
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+    unsigned int codec_tag;
+
+#if FF_API_STREAM_CODEC_TAG
+    /**
+     * @deprecated this field is unused
+     */
+    attribute_deprecated
+    unsigned int stream_codec_tag;
+#endif
+
+    void *priv_data;
+
+    /**
+     * Private context used for internal data.
+     *
+     * Unlike priv_data, this is not codec-specific. It is used in general
+     * libavcodec functions.
+     */
+    struct AVCodecInternal *internal;
+
+    /**
+     * Private data of the user, can be used to carry app specific stuff.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    void *opaque;
+
+    /**
+     * the average bitrate
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: Set by user, may be overwritten by libavcodec
+     *             if this info is available in the stream
+     */
+    int64_t bit_rate;
+
+    /**
+     * number of bits the bitstream is allowed to diverge from the reference.
+     *           the reference can be CBR (for CBR pass1) or VBR (for pass2)
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: unused
+     */
+    int bit_rate_tolerance;
+
+    /**
+     * Global quality for codecs which cannot change it per frame.
+     * This should be proportional to MPEG-1/2/4 qscale.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int global_quality;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int compression_level;
+#define FF_COMPRESSION_DEFAULT -1
+
+    /**
+     * AV_CODEC_FLAG_*.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int flags;
+
+    /**
+     * AV_CODEC_FLAG2_*
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int flags2;
+
+    /**
+     * some codecs need / can use extradata like Huffman tables.
+     * MJPEG: Huffman tables
+     * rv10: additional flags
+     * MPEG-4: global headers (they can be in the bitstream or here)
+     * The allocated memory should be AV_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * than extradata_size to avoid problems if it is read with the bitstream reader.
+     * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
+     * - encoding: Set/allocated/freed by libavcodec.
+     * - decoding: Set/allocated/freed by user.
+     */
+    uint8_t *extradata;
+    int extradata_size;
+
+    /**
+     * This is the fundamental unit of time (in seconds) in terms
+     * of which frame timestamps are represented. For fixed-fps content,
+     * timebase should be 1/framerate and timestamp increments should be
+     * identically 1.
+     * This often, but not always is the inverse of the frame rate or field rate
+     * for video. 1/time_base is not the average frame rate if the frame rate is not
+     * constant.
+     *
+     * Like containers, elementary streams also can store timestamps, 1/time_base
+     * is the unit in which these timestamps are specified.
+     * As example of such codec time base see ISO/IEC 14496-2:2001(E)
+     * vop_time_increment_resolution and fixed_vop_rate
+     * (fixed_vop_rate == 0 implies that it is different from the framerate)
+     *
+     * - encoding: MUST be set by user.
+     * - decoding: the use of this field for decoding is deprecated.
+     *             Use framerate instead.
+     */
+    AVRational time_base;
+
+    /**
+     * For some codecs, the time base is closer to the field rate than the frame rate.
+     * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration
+     * if no telecine is used ...
+     *
+     * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2.
+     */
+    int ticks_per_frame;
+
+    /**
+     * Codec delay.
+     *
+     * Encoding: Number of frames delay there will be from the encoder input to
+     *           the decoder output. (we assume the decoder matches the spec)
+     * Decoding: Number of frames delay in addition to what a standard decoder
+     *           as specified in the spec would produce.
+     *
+     * Video:
+     *   Number of frames the decoded output will be delayed relative to the
+     *   encoded input.
+     *
+     * Audio:
+     *   For encoding, this field is unused (see initial_padding).
+     *
+     *   For decoding, this is the number of samples the decoder needs to
+     *   output before the decoder's output is valid. When seeking, you should
+     *   start decoding this many samples prior to your desired seek point.
+     *
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int delay;
+
+
+    /* video only */
+    /**
+     * picture width / height.
+     *
+     * @note Those fields may not match the values of the last
+     * AVFrame output by avcodec_decode_video2 due frame
+     * reordering.
+     *
+     * - encoding: MUST be set by user.
+     * - decoding: May be set by the user before opening the decoder if known e.g.
+     *             from the container. Some decoders will require the dimensions
+     *             to be set by the caller. During decoding, the decoder may
+     *             overwrite those values as required while parsing the data.
+     */
+    int width, height;
+
+    /**
+     * Bitstream width / height, may be different from width/height e.g. when
+     * the decoded frame is cropped before being output or lowres is enabled.
+     *
+     * @note Those field may not match the value of the last
+     * AVFrame output by avcodec_receive_frame() due frame
+     * reordering.
+     *
+     * - encoding: unused
+     * - decoding: May be set by the user before opening the decoder if known
+     *             e.g. from the container. During decoding, the decoder may
+     *             overwrite those values as required while parsing the data.
+     */
+    int coded_width, coded_height;
+
+#if FF_API_ASPECT_EXTENDED
+#define FF_ASPECT_EXTENDED 15
+#endif
+
+    /**
+     * the number of pictures in a group of pictures, or 0 for intra_only
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int gop_size;
+
+    /**
+     * Pixel format, see AV_PIX_FMT_xxx.
+     * May be set by the demuxer if known from headers.
+     * May be overridden by the decoder if it knows better.
+     *
+     * @note This field may not match the value of the last
+     * AVFrame output by avcodec_receive_frame() due frame
+     * reordering.
+     *
+     * - encoding: Set by user.
+     * - decoding: Set by user if known, overridden by libavcodec while
+     *             parsing the data.
+     */
+    enum AVPixelFormat pix_fmt;
+
+#if FF_API_MOTION_EST
+    /**
+     * This option does nothing
+     * @deprecated use codec private options instead
+     */
+    attribute_deprecated int me_method;
+#endif
+
+    /**
+     * If non NULL, 'draw_horiz_band' is called by the libavcodec
+     * decoder to draw a horizontal band. It improves cache usage. Not
+     * all codecs can do that. You must check the codec capabilities
+     * beforehand.
+     * When multithreading is used, it may be called from multiple threads
+     * at the same time; threads might draw different parts of the same AVFrame,
+     * or multiple AVFrames, and there is no guarantee that slices will be drawn
+     * in order.
+     * The function is also used by hardware acceleration APIs.
+     * It is called at least once during frame decoding to pass
+     * the data needed for hardware render.
+     * In that mode instead of pixel data, AVFrame points to
+     * a structure specific to the acceleration API. The application
+     * reads the structure and can change some fields to indicate progress
+     * or mark state.
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @param height the height of the slice
+     * @param y the y position of the slice
+     * @param type 1->top field, 2->bottom field, 3->frame
+     * @param offset offset into the AVFrame.data from which the slice should be read
+     */
+    void (*draw_horiz_band)(struct AVCodecContext *s,
+                            const AVFrame *src, int offset[AV_NUM_DATA_POINTERS],
+                            int y, int type, int height);
+
+    /**
+     * callback to negotiate the pixelFormat
+     * @param fmt is the list of formats which are supported by the codec,
+     * it is terminated by -1 as 0 is a valid format, the formats are ordered by quality.
+     * The first is always the native one.
+     * @note The callback may be called again immediately if initialization for
+     * the selected (hardware-accelerated) pixel format failed.
+     * @warning Behavior is undefined if the callback returns a value not
+     * in the fmt list of formats.
+     * @return the chosen format
+     * - encoding: unused
+     * - decoding: Set by user, if not set the native format will be chosen.
+     */
+    enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
+
+    /**
+     * maximum number of B-frames between non-B-frames
+     * Note: The output will be delayed by max_b_frames+1 relative to the input.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_b_frames;
+
+    /**
+     * qscale factor between IP and B-frames
+     * If > 0 then the last P-frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float b_quant_factor;
+
+#if FF_API_RC_STRATEGY
+    /** @deprecated use codec private option instead */
+    attribute_deprecated int rc_strategy;
+#define FF_RC_STRATEGY_XVID 1
+#endif
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int b_frame_strategy;
+#endif
+
+    /**
+     * qscale offset between IP and B-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float b_quant_offset;
+
+    /**
+     * Size of the frame reordering buffer in the decoder.
+     * For MPEG-2 it is 1 IPB or 0 low delay IP.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int has_b_frames;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int mpeg_quant;
+#endif
+
+    /**
+     * qscale factor between P- and I-frames
+     * If > 0 then the last P-frame quantizer will be used (q = lastp_q * factor + offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float i_quant_factor;
+
+    /**
+     * qscale offset between P and I-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float i_quant_offset;
+
+    /**
+     * luminance masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float lumi_masking;
+
+    /**
+     * temporary complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float temporal_cplx_masking;
+
+    /**
+     * spatial complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float spatial_cplx_masking;
+
+    /**
+     * p block masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float p_masking;
+
+    /**
+     * darkness masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float dark_masking;
+
+    /**
+     * slice count
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user (or 0).
+     */
+    int slice_count;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+     int prediction_method;
+#define FF_PRED_LEFT   0
+#define FF_PRED_PLANE  1
+#define FF_PRED_MEDIAN 2
+#endif
+
+    /**
+     * slice offsets in the frame in bytes
+     * - encoding: Set/allocated by libavcodec.
+     * - decoding: Set/allocated by user (or NULL).
+     */
+    int *slice_offset;
+
+    /**
+     * sample aspect ratio (0 if unknown)
+     * That is the width of a pixel divided by the height of the pixel.
+     * Numerator and denominator must be relatively prime and smaller than 256 for some video standards.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    AVRational sample_aspect_ratio;
+
+    /**
+     * motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_cmp;
+    /**
+     * subpixel motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_sub_cmp;
+    /**
+     * macroblock comparison function (not supported yet)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_cmp;
+    /**
+     * interlaced DCT comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int ildct_cmp;
+#define FF_CMP_SAD    0
+#define FF_CMP_SSE    1
+#define FF_CMP_SATD   2
+#define FF_CMP_DCT    3
+#define FF_CMP_PSNR   4
+#define FF_CMP_BIT    5
+#define FF_CMP_RD     6
+#define FF_CMP_ZERO   7
+#define FF_CMP_VSAD   8
+#define FF_CMP_VSSE   9
+#define FF_CMP_NSSE   10
+#define FF_CMP_W53    11
+#define FF_CMP_W97    12
+#define FF_CMP_DCTMAX 13
+#define FF_CMP_DCT264 14
+#define FF_CMP_CHROMA 256
+
+    /**
+     * ME diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int dia_size;
+
+    /**
+     * amount of previous MV predictors (2a+1 x 2a+1 square)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int last_predictor_count;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int pre_me;
+#endif
+
+    /**
+     * motion estimation prepass comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_pre_cmp;
+
+    /**
+     * ME prepass diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int pre_dia_size;
+
+    /**
+     * subpel ME quality
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_subpel_quality;
+
+#if FF_API_AFD
+    /**
+     * DTG active format information (additional aspect ratio
+     * information only used in DVB MPEG-2 transport streams)
+     * 0 if not set.
+     *
+     * - encoding: unused
+     * - decoding: Set by decoder.
+     * @deprecated Deprecated in favor of AVSideData
+     */
+    attribute_deprecated int dtg_active_format;
+#define FF_DTG_AFD_SAME         8
+#define FF_DTG_AFD_4_3          9
+#define FF_DTG_AFD_16_9         10
+#define FF_DTG_AFD_14_9         11
+#define FF_DTG_AFD_4_3_SP_14_9  13
+#define FF_DTG_AFD_16_9_SP_14_9 14
+#define FF_DTG_AFD_SP_4_3       15
+#endif /* FF_API_AFD */
+
+    /**
+     * maximum motion estimation search range in subpel units
+     * If 0 then no limit.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_range;
+
+#if FF_API_QUANT_BIAS
+    /**
+     * @deprecated use encoder private option instead
+     */
+    attribute_deprecated int intra_quant_bias;
+#define FF_DEFAULT_QUANT_BIAS 999999
+
+    /**
+     * @deprecated use encoder private option instead
+     */
+    attribute_deprecated int inter_quant_bias;
+#endif
+
+    /**
+     * slice flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int slice_flags;
+#define SLICE_FLAG_CODED_ORDER    0x0001 ///< draw_horiz_band() is called in coded order instead of display
+#define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG-2 field pics)
+#define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
+
+#if FF_API_XVMC
+    /**
+     * XVideo Motion Acceleration
+     * - encoding: forbidden
+     * - decoding: set by decoder
+     * @deprecated XvMC doesn't need it anymore.
+     */
+    attribute_deprecated int xvmc_acceleration;
+#endif /* FF_API_XVMC */
+
+    /**
+     * macroblock decision mode
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_decision;
+#define FF_MB_DECISION_SIMPLE 0        ///< uses mb_cmp
+#define FF_MB_DECISION_BITS   1        ///< chooses the one which needs the fewest bits
+#define FF_MB_DECISION_RD     2        ///< rate distortion
+
+    /**
+     * custom intra quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+    uint16_t *intra_matrix;
+
+    /**
+     * custom inter quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+    uint16_t *inter_matrix;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int scenechange_threshold;
+
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int noise_reduction;
+#endif
+
+#if FF_API_MPV_OPT
+    /**
+     * @deprecated this field is unused
+     */
+    attribute_deprecated
+    int me_threshold;
+
+    /**
+     * @deprecated this field is unused
+     */
+    attribute_deprecated
+    int mb_threshold;
+#endif
+
+    /**
+     * precision of the intra DC coefficient - 8
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec
+     */
+    int intra_dc_precision;
+
+    /**
+     * Number of macroblock rows at the top which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int skip_top;
+
+    /**
+     * Number of macroblock rows at the bottom which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int skip_bottom;
+
+#if FF_API_MPV_OPT
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    float border_masking;
+#endif
+
+    /**
+     * minimum MB Lagrange multiplier
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_lmin;
+
+    /**
+     * maximum MB Lagrange multiplier
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_lmax;
+
+#if FF_API_PRIVATE_OPT
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    int me_penalty_compensation;
+#endif
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int bidir_refine;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int brd_scale;
+#endif
+
+    /**
+     * minimum GOP size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int keyint_min;
+
+    /**
+     * number of reference frames
+     * - encoding: Set by user.
+     * - decoding: Set by lavc.
+     */
+    int refs;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int chromaoffset;
+#endif
+
+#if FF_API_UNUSED_MEMBERS
+    /**
+     * Multiplied by qscale for each frame and added to scene_change_score.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    attribute_deprecated int scenechange_factor;
+#endif
+
+    /**
+     * Note: Value depends upon the compare function used for fullpel ME.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mv0_threshold;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int b_sensitivity;
+#endif
+
+    /**
+     * Chromaticity coordinates of the source primaries.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorPrimaries color_primaries;
+
+    /**
+     * Color Transfer Characteristic.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorTransferCharacteristic color_trc;
+
+    /**
+     * YUV colorspace type.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorSpace colorspace;
+
+    /**
+     * MPEG vs JPEG YUV range.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorRange color_range;
+
+    /**
+     * This defines the location of chroma samples.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVChromaLocation chroma_sample_location;
+
+    /**
+     * Number of slices.
+     * Indicates number of picture subdivisions. Used for parallelized
+     * decoding.
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    int slices;
+
+    /** Field order
+     * - encoding: set by libavcodec
+     * - decoding: Set by user.
+     */
+    enum AVFieldOrder field_order;
+
+    /* audio only */
+    int sample_rate; ///< samples per second
+    int channels;    ///< number of audio channels
+
+    /**
+     * audio sample format
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum AVSampleFormat sample_fmt;  ///< sample format
+
+    /* The following data should not be initialized. */
+    /**
+     * Number of samples per channel in an audio frame.
+     *
+     * - encoding: set by libavcodec in avcodec_open2(). Each submitted frame
+     *   except the last must contain exactly frame_size samples per channel.
+     *   May be 0 when the codec has AV_CODEC_CAP_VARIABLE_FRAME_SIZE set, then the
+     *   frame size is not restricted.
+     * - decoding: may be set by some decoders to indicate constant frame size
+     */
+    int frame_size;
+
+    /**
+     * Frame counter, set by libavcodec.
+     *
+     * - decoding: total number of frames returned from the decoder so far.
+     * - encoding: total number of frames passed to the encoder so far.
+     *
+     *   @note the counter is not incremented if encoding/decoding resulted in
+     *   an error.
+     */
+    int frame_number;
+
+    /**
+     * number of bytes per packet if constant and known or 0
+     * Used by some WAV based audio codecs.
+     */
+    int block_align;
+
+    /**
+     * Audio cutoff bandwidth (0 means "automatic")
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int cutoff;
+
+    /**
+     * Audio channel layout.
+     * - encoding: set by user.
+     * - decoding: set by user, may be overwritten by libavcodec.
+     */
+    uint64_t channel_layout;
+
+    /**
+     * Request decoder to use this channel layout if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    uint64_t request_channel_layout;
+
+    /**
+     * Type of service that the audio stream conveys.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum AVAudioServiceType audio_service_type;
+
+    /**
+     * desired sample format
+     * - encoding: Not used.
+     * - decoding: Set by user.
+     * Decoder will decode to this format if it can.
+     */
+    enum AVSampleFormat request_sample_fmt;
+
+    /**
+     * This callback is called at the beginning of each frame to get data
+     * buffer(s) for it. There may be one contiguous buffer for all the data or
+     * there may be a buffer per each data plane or anything in between. What
+     * this means is, you may set however many entries in buf[] you feel necessary.
+     * Each buffer must be reference-counted using the AVBuffer API (see description
+     * of buf[] below).
+     *
+     * The following fields will be set in the frame before this callback is
+     * called:
+     * - format
+     * - width, height (video only)
+     * - sample_rate, channel_layout, nb_samples (audio only)
+     * Their values may differ from the corresponding values in
+     * AVCodecContext. This callback must use the frame values, not the codec
+     * context values, to calculate the required buffer size.
+     *
+     * This callback must fill the following fields in the frame:
+     * - data[]
+     * - linesize[]
+     * - extended_data:
+     *   * if the data is planar audio with more than 8 channels, then this
+     *     callback must allocate and fill extended_data to contain all pointers
+     *     to all data planes. data[] must hold as many pointers as it can.
+     *     extended_data must be allocated with av_malloc() and will be freed in
+     *     av_frame_unref().
+     *   * otherwise extended_data must point to data
+     * - buf[] must contain one or more pointers to AVBufferRef structures. Each of
+     *   the frame's data and extended_data pointers must be contained in these. That
+     *   is, one AVBufferRef for each allocated chunk of memory, not necessarily one
+     *   AVBufferRef per data[] entry. See: av_buffer_create(), av_buffer_alloc(),
+     *   and av_buffer_ref().
+     * - extended_buf and nb_extended_buf must be allocated with av_malloc() by
+     *   this callback and filled with the extra buffers if there are more
+     *   buffers than buf[] can hold. extended_buf will be freed in
+     *   av_frame_unref().
+     *
+     * If AV_CODEC_CAP_DR1 is not set then get_buffer2() must call
+     * avcodec_default_get_buffer2() instead of providing buffers allocated by
+     * some other means.
+     *
+     * Each data plane must be aligned to the maximum required by the target
+     * CPU.
+     *
+     * @see avcodec_default_get_buffer2()
+     *
+     * Video:
+     *
+     * If AV_GET_BUFFER_FLAG_REF is set in flags then the frame may be reused
+     * (read and/or written to if it is writable) later by libavcodec.
+     *
+     * avcodec_align_dimensions2() should be used to find the required width and
+     * height, as they normally need to be rounded up to the next multiple of 16.
+     *
+     * Some decoders do not support linesizes changing between frames.
+     *
+     * If frame multithreading is used and thread_safe_callbacks is set,
+     * this callback may be called from a different thread, but not from more
+     * than one at once. Does not need to be reentrant.
+     *
+     * @see avcodec_align_dimensions2()
+     *
+     * Audio:
+     *
+     * Decoders request a buffer of a particular size by setting
+     * AVFrame.nb_samples prior to calling get_buffer2(). The decoder may,
+     * however, utilize only part of the buffer by setting AVFrame.nb_samples
+     * to a smaller value in the output frame.
+     *
+     * As a convenience, av_samples_get_buffer_size() and
+     * av_samples_fill_arrays() in libavutil may be used by custom get_buffer2()
+     * functions to find the required data size and to fill data pointers and
+     * linesize. In AVFrame.linesize, only linesize[0] may be set for audio
+     * since all planes must be the same size.
+     *
+     * @see av_samples_get_buffer_size(), av_samples_fill_arrays()
+     *
+     * - encoding: unused
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags);
+
+    /**
+     * If non-zero, the decoded audio and video frames returned from
+     * avcodec_decode_video2() and avcodec_decode_audio4() are reference-counted
+     * and are valid indefinitely. The caller must free them with
+     * av_frame_unref() when they are not needed anymore.
+     * Otherwise, the decoded frames must not be freed by the caller and are
+     * only valid until the next decode call.
+     *
+     * This is always automatically enabled if avcodec_receive_frame() is used.
+     *
+     * - encoding: unused
+     * - decoding: set by the caller before avcodec_open2().
+     */
+    int refcounted_frames;
+
+    /* - encoding parameters */
+    float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
+    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+
+    /**
+     * minimum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int qmin;
+
+    /**
+     * maximum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int qmax;
+
+    /**
+     * maximum quantizer difference between frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_qdiff;
+
+#if FF_API_MPV_OPT
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    float rc_qsquish;
+
+    attribute_deprecated
+    float rc_qmod_amp;
+    attribute_deprecated
+    int rc_qmod_freq;
+#endif
+
+    /**
+     * decoder bitstream buffer size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_buffer_size;
+
+    /**
+     * ratecontrol override, see RcOverride
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+    int rc_override_count;
+    RcOverride *rc_override;
+
+#if FF_API_MPV_OPT
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    const char *rc_eq;
+#endif
+
+    /**
+     * maximum bitrate
+     * - encoding: Set by user.
+     * - decoding: Set by user, may be overwritten by libavcodec.
+     */
+    int64_t rc_max_rate;
+
+    /**
+     * minimum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int64_t rc_min_rate;
+
+#if FF_API_MPV_OPT
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    float rc_buffer_aggressivity;
+
+    attribute_deprecated
+    float rc_initial_cplx;
+#endif
+
+    /**
+     * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+    float rc_max_available_vbv_use;
+
+    /**
+     * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+    float rc_min_vbv_overflow_use;
+
+    /**
+     * Number of bits which should be loaded into the rc buffer before decoding starts.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_initial_buffer_occupancy;
+
+#if FF_API_CODER_TYPE
+#define FF_CODER_TYPE_VLC       0
+#define FF_CODER_TYPE_AC        1
+#define FF_CODER_TYPE_RAW       2
+#define FF_CODER_TYPE_RLE       3
+#if FF_API_UNUSED_MEMBERS
+#define FF_CODER_TYPE_DEFLATE   4
+#endif /* FF_API_UNUSED_MEMBERS */
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    int coder_type;
+#endif /* FF_API_CODER_TYPE */
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int context_model;
+#endif
+
+#if FF_API_MPV_OPT
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    int lmin;
+
+    /**
+     * @deprecated use encoder private options instead
+     */
+    attribute_deprecated
+    int lmax;
+#endif
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int frame_skip_threshold;
+
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int frame_skip_factor;
+
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int frame_skip_exp;
+
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int frame_skip_cmp;
+#endif /* FF_API_PRIVATE_OPT */
+
+    /**
+     * trellis RD quantization
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int trellis;
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int min_prediction_order;
+
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int max_prediction_order;
+
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int64_t timecode_frame_start;
+#endif
+
+#if FF_API_RTP_CALLBACK
+    /**
+     * @deprecated unused
+     */
+    /* The RTP callback: This function is called    */
+    /* every time the encoder has a packet to send. */
+    /* It depends on the encoder if the data starts */
+    /* with a Start Code (it should). H.263 does.   */
+    /* mb_nb contains the number of macroblocks     */
+    /* encoded in the RTP payload.                  */
+    attribute_deprecated
+    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
+#endif
+
+#if FF_API_PRIVATE_OPT
+    /** @deprecated use encoder private options instead */
+    attribute_deprecated
+    int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
+                            /* do its best to deliver a chunk with size     */
+                            /* below rtp_payload_size, the chunk will start */
+                            /* with a start code on some codecs like H.263. */
+                            /* This doesn't take account of any particular  */
+                            /* headers inside the transmitted RTP payload.  */
+#endif
+
+#if FF_API_STAT_BITS
+    /* statistics, used for 2-pass encoding */
+    attribute_deprecated
+    int mv_bits;
+    attribute_deprecated
+    int header_bits;
+    attribute_deprecated
+    int i_tex_bits;
+    attribute_deprecated
+    int p_tex_bits;
+    attribute_deprecated
+    int i_count;
+    attribute_deprecated
+    int p_count;
+    attribute_deprecated
+    int skip_count;
+    attribute_deprecated
+    int misc_bits;
+
+    /** @deprecated this field is unused */
+    attribute_deprecated
+    int frame_bits;
+#endif
+
+    /**
+     * pass1 encoding statistics output buffer
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+    char *stats_out;
+
+    /**
+     * pass2 encoding statistics input buffer
+     * Concatenated stuff from stats_out of pass1 should be placed here.
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+    char *stats_in;
+
+    /**
+     * Work around bugs in encoders which sometimes cannot be detected automatically.
+     * - encoding: Set by user
+     * - decoding: Set by user
+     */
+    int workaround_bugs;
+#define FF_BUG_AUTODETECT       1  ///< autodetection
+#if FF_API_OLD_MSMPEG4
+#define FF_BUG_OLD_MSMPEG4      2
+#endif
+#define FF_BUG_XVID_ILACE       4
+#define FF_BUG_UMP4             8
+#define FF_BUG_NO_PADDING       16
+#define FF_BUG_AMV              32
+#if FF_API_AC_VLC
+#define FF_BUG_AC_VLC           0  ///< Will be removed, libavcodec can now handle these non-compliant files by default.
+#endif
+#define FF_BUG_QPEL_CHROMA      64
+#define FF_BUG_STD_QPEL         128
+#define FF_BUG_QPEL_CHROMA2     256
+#define FF_BUG_DIRECT_BLOCKSIZE 512
+#define FF_BUG_EDGE             1024
+#define FF_BUG_HPEL_CHROMA      2048
+#define FF_BUG_DC_CLIP          4096
+#define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
+#define FF_BUG_TRUNCATED       16384
+
+    /**
+     * strictly follow the standard (MPEG-4, ...).
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     * Setting this to STRICT or higher means the encoder and decoder will
+     * generally do stupid things, whereas setting it to unofficial or lower
+     * will mean the encoder might produce output that is not supported by all
+     * spec-compliant decoders. Decoders don't differentiate between normal,
+     * unofficial and experimental (that is, they always try to decode things
+     * when they can) unless they are explicitly asked to behave stupidly
+     * (=strictly conform to the specs)
+     */
+    int strict_std_compliance;
+#define FF_COMPLIANCE_VERY_STRICT   2 ///< Strictly conform to an older more strict version of the spec or reference software.
+#define FF_COMPLIANCE_STRICT        1 ///< Strictly conform to all the things in the spec no matter what consequences.
+#define FF_COMPLIANCE_NORMAL        0
+#define FF_COMPLIANCE_UNOFFICIAL   -1 ///< Allow unofficial extensions
+#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< Allow nonstandardized experimental things.
+
+    /**
+     * error concealment flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int error_concealment;
+#define FF_EC_GUESS_MVS   1
+#define FF_EC_DEBLOCK     2
+#define FF_EC_FAVOR_INTER 256
+
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug;
+#define FF_DEBUG_PICT_INFO   1
+#define FF_DEBUG_RC          2
+#define FF_DEBUG_BITSTREAM   4
+#define FF_DEBUG_MB_TYPE     8
+#define FF_DEBUG_QP          16
+#if FF_API_DEBUG_MV
+/**
+ * @deprecated this option does nothing
+ */
+#define FF_DEBUG_MV          32
+#endif
+#define FF_DEBUG_DCT_COEFF   0x00000040
+#define FF_DEBUG_SKIP        0x00000080
+#define FF_DEBUG_STARTCODE   0x00000100
+#if FF_API_UNUSED_MEMBERS
+#define FF_DEBUG_PTS         0x00000200
+#endif /* FF_API_UNUSED_MEMBERS */
+#define FF_DEBUG_ER          0x00000400
+#define FF_DEBUG_MMCO        0x00000800
+#define FF_DEBUG_BUGS        0x00001000
+#if FF_API_DEBUG_MV
+#define FF_DEBUG_VIS_QP      0x00002000 ///< only access through AVOptions from outside libavcodec
+#define FF_DEBUG_VIS_MB_TYPE 0x00004000 ///< only access through AVOptions from outside libavcodec
+#endif
+#define FF_DEBUG_BUFFERS     0x00008000
+#define FF_DEBUG_THREADS     0x00010000
+#define FF_DEBUG_GREEN_MD    0x00800000
+#define FF_DEBUG_NOMC        0x01000000
+
+#if FF_API_DEBUG_MV
+    /**
+     * debug
+     * Code outside libavcodec should access this field using AVOptions
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 // visualize forward predicted MVs of P-frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 // visualize forward predicted MVs of B-frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 // visualize backward predicted MVs of B-frames
+#endif
+
+    /**
+     * Error recognition; may misdetect some more or less valid parts as errors.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int err_recognition;
+
+/**
+ * Verify checksums embedded in the bitstream (could be of either encoded or
+ * decoded data, depending on the codec) and print an error message on mismatch.
+ * If AV_EF_EXPLODE is also set, a mismatching checksum will result in the
+ * decoder returning an error.
+ */
+#define AV_EF_CRCCHECK  (1<<0)
+#define AV_EF_BITSTREAM (1<<1)          ///< detect bitstream specification deviations
+#define AV_EF_BUFFER    (1<<2)          ///< detect improper bitstream length
+#define AV_EF_EXPLODE   (1<<3)          ///< abort decoding on minor error detection
+
+#define AV_EF_IGNORE_ERR (1<<15)        ///< ignore errors and continue
+#define AV_EF_CAREFUL    (1<<16)        ///< consider things that violate the spec, are fast to calculate and have not been seen in the wild as errors
+#define AV_EF_COMPLIANT  (1<<17)        ///< consider all spec non compliances as errors
+#define AV_EF_AGGRESSIVE (1<<18)        ///< consider things that a sane encoder should not do as an error
+
+
+    /**
+     * opaque 64-bit number (generally a PTS) that will be reordered and
+     * output in AVFrame.reordered_opaque
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int64_t reordered_opaque;
+
+    /**
+     * Hardware accelerator in use
+     * - encoding: unused.
+     * - decoding: Set by libavcodec
+     */
+    struct AVHWAccel *hwaccel;
+
+    /**
+     * Hardware accelerator context.
+     * For some hardware accelerators, a global context needs to be
+     * provided by the user. In that case, this holds display-dependent
+     * data FFmpeg cannot instantiate itself. Please refer to the
+     * FFmpeg HW accelerator documentation to know how to fill this
+     * is. e.g. for VA API, this is a struct vaapi_context.
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+    void *hwaccel_context;
+
+    /**
+     * error
+     * - encoding: Set by libavcodec if flags & AV_CODEC_FLAG_PSNR.
+     * - decoding: unused
+     */
+    uint64_t error[AV_NUM_DATA_POINTERS];
+
+    /**
+     * DCT algorithm, see FF_DCT_* below
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int dct_algo;
+#define FF_DCT_AUTO    0
+#define FF_DCT_FASTINT 1
+#define FF_DCT_INT     2
+#define FF_DCT_MMX     3
+#define FF_DCT_ALTIVEC 5
+#define FF_DCT_FAAN    6
+
+    /**
+     * IDCT algorithm, see FF_IDCT_* below.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int idct_algo;
+#define FF_IDCT_AUTO          0
+#define FF_IDCT_INT           1
+#define FF_IDCT_SIMPLE        2
+#define FF_IDCT_SIMPLEMMX     3
+#define FF_IDCT_ARM           7
+#define FF_IDCT_ALTIVEC       8
+#if FF_API_ARCH_SH4
+#define FF_IDCT_SH4           9
+#endif
+#define FF_IDCT_SIMPLEARM     10
+#if FF_API_UNUSED_MEMBERS
+#define FF_IDCT_IPP           13
+#endif /* FF_API_UNUSED_MEMBERS */
+#define FF_IDCT_XVID          14
+#if FF_API_IDCT_XVIDMMX
+#define FF_IDCT_XVIDMMX       14
+#endif /* FF_API_IDCT_XVIDMMX */
+#define FF_IDCT_SIMPLEARMV5TE 16
+#define FF_IDCT_SIMPLEARMV6   17
+#if FF_API_ARCH_SPARC
+#define FF_IDCT_SIMPLEVIS     18
+#endif
+#define FF_IDCT_FAAN          20
+#define FF_IDCT_SIMPLENEON    22
+#if FF_API_ARCH_ALPHA
+#define FF_IDCT_SIMPLEALPHA   23
+#endif
+#define FF_IDCT_SIMPLEAUTO    128
+
+    /**
+     * bits per sample/pixel from the demuxer (needed for huffyuv).
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user.
+     */
+     int bits_per_coded_sample;
+
+    /**
+     * Bits per sample/pixel of internal libavcodec pixel/sample format.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+    int bits_per_raw_sample;
+
+#if FF_API_LOWRES
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_lowres(avctx)
+     */
+     int lowres;
+#endif
+
+#if FF_API_CODED_FRAME
+    /**
+     * the picture in the bitstream
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     *
+     * @deprecated use the quality factor packet side data instead
+     */
+    attribute_deprecated AVFrame *coded_frame;
+#endif
+
+    /**
+     * thread count
+     * is used to decide how many independent tasks should be passed to execute()
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int thread_count;
+
+    /**
+     * Which multithreading methods to use.
+     * Use of FF_THREAD_FRAME will increase decoding delay by one frame per thread,
+     * so clients which cannot provide future frames should not use it.
+     *
+     * - encoding: Set by user, otherwise the default is used.
+     * - decoding: Set by user, otherwise the default is used.
+     */
+    int thread_type;
+#define FF_THREAD_FRAME   1 ///< Decode more than one frame at once
+#define FF_THREAD_SLICE   2 ///< Decode more than one part of a single frame at once
+
+    /**
+     * Which multithreading methods are in use by the codec.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int active_thread_type;
+
+    /**
+     * Set by the client if its custom get_buffer() callback can be called
+     * synchronously from another thread, which allows faster multithreaded decoding.
+     * draw_horiz_band() will be called from other threads regardless of this setting.
+     * Ignored if the default get_buffer() is used.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int thread_safe_callbacks;
+
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * @param count the number of things to execute
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void *arg2, int *ret, int count, int size);
+
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * Also see avcodec_thread_init and e.g. the --enable-pthread configure option.
+     * @param c context passed also to func
+     * @param count the number of things to execute
+     * @param arg2 argument passed unchanged to func
+     * @param ret return values of executed functions, must have space for "count" values. May be NULL.
+     * @param func function that will be called count times, with jobnr from 0 to count-1.
+     *             threadnr will be in the range 0 to c->thread_count-1 < MAX_THREADS and so that no
+     *             two instances of func executing at the same time will have the same threadnr.
+     * @return always 0 currently, but code should handle a future improvement where when any call to func
+     *         returns < 0 no further calls to func may be done and < 0 is returned.
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*execute2)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg, int jobnr, int threadnr), void *arg2, int *ret, int count);
+
+    /**
+     * noise vs. sse weight for the nsse comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int nsse_weight;
+
+    /**
+     * profile
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+     int profile;
+#define FF_PROFILE_UNKNOWN -99
+#define FF_PROFILE_RESERVED -100
+
+#define FF_PROFILE_AAC_MAIN 0
+#define FF_PROFILE_AAC_LOW  1
+#define FF_PROFILE_AAC_SSR  2
+#define FF_PROFILE_AAC_LTP  3
+#define FF_PROFILE_AAC_HE   4
+#define FF_PROFILE_AAC_HE_V2 28
+#define FF_PROFILE_AAC_LD   22
+#define FF_PROFILE_AAC_ELD  38
+#define FF_PROFILE_MPEG2_AAC_LOW 128
+#define FF_PROFILE_MPEG2_AAC_HE  131
+
+#define FF_PROFILE_DTS         20
+#define FF_PROFILE_DTS_ES      30
+#define FF_PROFILE_DTS_96_24   40
+#define FF_PROFILE_DTS_HD_HRA  50
+#define FF_PROFILE_DTS_HD_MA   60
+#define FF_PROFILE_DTS_EXPRESS 70
+
+#define FF_PROFILE_MPEG2_422    0
+#define FF_PROFILE_MPEG2_HIGH   1
+#define FF_PROFILE_MPEG2_SS     2
+#define FF_PROFILE_MPEG2_SNR_SCALABLE  3
+#define FF_PROFILE_MPEG2_MAIN   4
+#define FF_PROFILE_MPEG2_SIMPLE 5
+
+#define FF_PROFILE_H264_CONSTRAINED  (1<<9)  // 8+1; constraint_set1_flag
+#define FF_PROFILE_H264_INTRA        (1<<11) // 8+3; constraint_set3_flag
+
+#define FF_PROFILE_H264_BASELINE             66
+#define FF_PROFILE_H264_CONSTRAINED_BASELINE (66|FF_PROFILE_H264_CONSTRAINED)
+#define FF_PROFILE_H264_MAIN                 77
+#define FF_PROFILE_H264_EXTENDED             88
+#define FF_PROFILE_H264_HIGH                 100
+#define FF_PROFILE_H264_HIGH_10              110
+#define FF_PROFILE_H264_HIGH_10_INTRA        (110|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_HIGH_422             122
+#define FF_PROFILE_H264_HIGH_422_INTRA       (122|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_HIGH_444             144
+#define FF_PROFILE_H264_HIGH_444_PREDICTIVE  244
+#define FF_PROFILE_H264_HIGH_444_INTRA       (244|FF_PROFILE_H264_INTRA)
+#define FF_PROFILE_H264_CAVLC_444            44
+
+#define FF_PROFILE_VC1_SIMPLE   0
+#define FF_PROFILE_VC1_MAIN     1
+#define FF_PROFILE_VC1_COMPLEX  2
+#define FF_PROFILE_VC1_ADVANCED 3
+
+#define FF_PROFILE_MPEG4_SIMPLE                     0
+#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE            1
+#define FF_PROFILE_MPEG4_CORE                       2
+#define FF_PROFILE_MPEG4_MAIN                       3
+#define FF_PROFILE_MPEG4_N_BIT                      4
+#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE           5
+#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION      6
+#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE     7
+#define FF_PROFILE_MPEG4_HYBRID                     8
+#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME         9
+#define FF_PROFILE_MPEG4_CORE_SCALABLE             10
+#define FF_PROFILE_MPEG4_ADVANCED_CODING           11
+#define FF_PROFILE_MPEG4_ADVANCED_CORE             12
+#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
+#define FF_PROFILE_MPEG4_SIMPLE_STUDIO             14
+#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE           15
+
+#define FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0   1
+#define FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1   2
+#define FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION  32768
+#define FF_PROFILE_JPEG2000_DCINEMA_2K              3
+#define FF_PROFILE_JPEG2000_DCINEMA_4K              4
+
+#define FF_PROFILE_VP9_0                            0
+#define FF_PROFILE_VP9_1                            1
+#define FF_PROFILE_VP9_2                            2
+#define FF_PROFILE_VP9_3                            3
+
+#define FF_PROFILE_HEVC_MAIN                        1
+#define FF_PROFILE_HEVC_MAIN_10                     2
+#define FF_PROFILE_HEVC_MAIN_STILL_PICTURE          3
+#define FF_PROFILE_HEVC_REXT                        4
+
+    /**
+     * level
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+     int level;
+#define FF_LEVEL_UNKNOWN -99
+
+    /**
+     * Skip loop filtering for selected frames.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_loop_filter;
+
+    /**
+     * Skip IDCT/dequantization for selected frames.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_idct;
+
+    /**
+     * Skip decoding for selected frames.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_frame;
+
+    /**
+     * Header containing style information for text subtitles.
+     * For SUBTITLE_ASS subtitle type, it should contain the whole ASS
+     * [Script Info] and [V4+ Styles] section, plus the [Events] line and
+     * the Format line following. It shouldn't include any Dialogue line.
+     * - encoding: Set/allocated/freed by user (before avcodec_open2())
+     * - decoding: Set/allocated/freed by libavcodec (by avcodec_open2())
+     */
+    uint8_t *subtitle_header;
+    int subtitle_header_size;
+
+#if FF_API_ERROR_RATE
+    /**
+     * @deprecated use the 'error_rate' private AVOption of the mpegvideo
+     * encoders
+     */
+    attribute_deprecated
+    int error_rate;
+#endif
+
+#if FF_API_VBV_DELAY
+    /**
+     * VBV delay coded in the last frame (in periods of a 27 MHz clock).
+     * Used for compliant TS muxing.
+     * - encoding: Set by libavcodec.
+     * - decoding: unused.
+     * @deprecated this value is now exported as a part of
+     * AV_PKT_DATA_CPB_PROPERTIES packet side data
+     */
+    attribute_deprecated
+    uint64_t vbv_delay;
+#endif
+
+#if FF_API_SIDEDATA_ONLY_PKT
+    /**
+     * Encoding only and set by default. Allow encoders to output packets
+     * that do not contain any encoded data, only side data.
+     *
+     * Some encoders need to output such packets, e.g. to update some stream
+     * parameters at the end of encoding.
+     *
+     * @deprecated this field disables the default behaviour and
+     *             it is kept only for compatibility.
+     */
+    attribute_deprecated
+    int side_data_only_packets;
+#endif
+
+    /**
+     * Audio only. The number of "priming" samples (padding) inserted by the
+     * encoder at the beginning of the audio. I.e. this number of leading
+     * decoded samples must be discarded by the caller to get the original audio
+     * without leading padding.
+     *
+     * - decoding: unused
+     * - encoding: Set by libavcodec. The timestamps on the output packets are
+     *             adjusted by the encoder so that they always refer to the
+     *             first sample of the data actually contained in the packet,
+     *             including any added padding.  E.g. if the timebase is
+     *             1/samplerate and the timestamp of the first input sample is
+     *             0, the timestamp of the first output packet will be
+     *             -initial_padding.
+     */
+    int initial_padding;
+
+    /**
+     * - decoding: For codecs that store a framerate value in the compressed
+     *             bitstream, the decoder may export it here. { 0, 1} when
+     *             unknown.
+     * - encoding: May be used to signal the framerate of CFR content to an
+     *             encoder.
+     */
+    AVRational framerate;
+
+    /**
+     * Nominal unaccelerated pixel format, see AV_PIX_FMT_xxx.
+     * - encoding: unused.
+     * - decoding: Set by libavcodec before calling get_format()
+     */
+    enum AVPixelFormat sw_pix_fmt;
+
+    /**
+     * Timebase in which pkt_dts/pts and AVPacket.dts/pts are.
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_pkt_timebase(avctx)
+     * - encoding unused.
+     * - decoding set by user.
+     */
+    AVRational pkt_timebase;
+
+    /**
+     * AVCodecDescriptor
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_codec_descriptor(avctx)
+     * - encoding: unused.
+     * - decoding: set by libavcodec.
+     */
+    const AVCodecDescriptor *codec_descriptor;
+
+#if !FF_API_LOWRES
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_lowres(avctx)
+     */
+     int lowres;
+#endif
+
+    /**
+     * Current statistics for PTS correction.
+     * - decoding: maintained and used by libavcodec, not intended to be used by user apps
+     * - encoding: unused
+     */
+    int64_t pts_correction_num_faulty_pts; /// Number of incorrect PTS values so far
+    int64_t pts_correction_num_faulty_dts; /// Number of incorrect DTS values so far
+    int64_t pts_correction_last_pts;       /// PTS of the last frame
+    int64_t pts_correction_last_dts;       /// DTS of the last frame
+
+    /**
+     * Character encoding of the input subtitles file.
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    char *sub_charenc;
+
+    /**
+     * Subtitles character encoding mode. Formats or codecs might be adjusting
+     * this setting (if they are doing the conversion themselves for instance).
+     * - decoding: set by libavcodec
+     * - encoding: unused
+     */
+    int sub_charenc_mode;
+#define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
+#define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///< libavcodec will select the mode itself
+#define FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
+
+    /**
+     * Skip processing alpha if supported by codec.
+     * Note that if the format uses pre-multiplied alpha (common with VP6,
+     * and recommended due to better video quality/compression)
+     * the image will look as if alpha-blended onto a black background.
+     * However for formats that do not use pre-multiplied alpha
+     * there might be serious artefacts (though e.g. libswscale currently
+     * assumes pre-multiplied alpha anyway).
+     * Code outside libavcodec should access this field using AVOptions
+     *
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    int skip_alpha;
+
+    /**
+     * Number of samples to skip after a discontinuity
+     * - decoding: unused
+     * - encoding: set by libavcodec
+     */
+    int seek_preroll;
+
+#if !FF_API_DEBUG_MV
+    /**
+     * debug motion vectors
+     * Code outside libavcodec should access this field using AVOptions
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames
+#endif
+
+    /**
+     * custom intra quantization matrix
+     * Code outside libavcodec should access this field using av_codec_g/set_chroma_intra_matrix()
+     * - encoding: Set by user, can be NULL.
+     * - decoding: unused.
+     */
+    uint16_t *chroma_intra_matrix;
+
+    /**
+     * dump format separator.
+     * can be ", " or "\n      " or anything else
+     * Code outside libavcodec should access this field using AVOptions
+     * (NO direct access).
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    uint8_t *dump_separator;
+
+    /**
+     * ',' separated list of allowed decoders.
+     * If NULL then all are allowed
+     * - encoding: unused
+     * - decoding: set by user through AVOPtions (NO direct access)
+     */
+    char *codec_whitelist;
+
+    /*
+     * Properties of the stream that gets decoded
+     * To be accessed through av_codec_get_properties() (NO direct access)
+     * - encoding: unused
+     * - decoding: set by libavcodec
+     */
+    unsigned properties;
+#define FF_CODEC_PROPERTY_LOSSLESS        0x00000001
+#define FF_CODEC_PROPERTY_CLOSED_CAPTIONS 0x00000002
+
+    /**
+     * Additional data associated with the entire coded stream.
+     *
+     * - decoding: unused
+     * - encoding: may be set by libavcodec after avcodec_open2().
+     */
+    AVPacketSideData *coded_side_data;
+    int            nb_coded_side_data;
+
+    /**
+     * Encoding only.
+     *
+     * For hardware encoders configured to use a hwaccel pixel format, this
+     * field should be set by the caller to a reference to the AVHWFramesContext
+     * describing input frames. AVHWFramesContext.format must be equal to
+     * AVCodecContext.pix_fmt.
+     *
+     * This field should be set before avcodec_open2() is called and is
+     * afterwards owned and managed by libavcodec.
+     */
+    AVBufferRef *hw_frames_ctx;
+
+    /**
+     * Control the form of AVSubtitle.rects[N]->ass
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    int sub_text_format;
+#define FF_SUB_TEXT_FMT_ASS              0
+#if FF_API_ASS_TIMING
+#define FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS 1
+#endif
+
+} AVCodecContext;
+
+AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
+void       av_codec_set_pkt_timebase         (AVCodecContext *avctx, AVRational val);
+
+const AVCodecDescriptor *av_codec_get_codec_descriptor(const AVCodecContext *avctx);
+void                     av_codec_set_codec_descriptor(AVCodecContext *avctx, const AVCodecDescriptor *desc);
+
+unsigned av_codec_get_codec_properties(const AVCodecContext *avctx);
+
+int  av_codec_get_lowres(const AVCodecContext *avctx);
+void av_codec_set_lowres(AVCodecContext *avctx, int val);
+
+int  av_codec_get_seek_preroll(const AVCodecContext *avctx);
+void av_codec_set_seek_preroll(AVCodecContext *avctx, int val);
+
+uint16_t *av_codec_get_chroma_intra_matrix(const AVCodecContext *avctx);
+void av_codec_set_chroma_intra_matrix(AVCodecContext *avctx, uint16_t *val);
+
+/**
+ * AVProfile.
+ */
+typedef struct AVProfile {
+    int profile;
+    const char *name; ///< short name for the profile
+} AVProfile;
+
+typedef struct AVCodecDefault AVCodecDefault;
+
+struct AVSubtitle;
+
+/**
+ * AVCodec.
+ */
+typedef struct AVCodec {
+    /**
+     * Name of the codec implementation.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     * This is the primary way to find a codec from the user perspective.
+     */
+    const char *name;
+    /**
+     * Descriptive name for the codec, meant to be more human readable than name.
+     * You should use the NULL_IF_CONFIG_SMALL() macro to define it.
+     */
+    const char *long_name;
+    enum AVMediaType type;
+    enum AVCodecID id;
+    /**
+     * Codec capabilities.
+     * see AV_CODEC_CAP_*
+     */
+    int capabilities;
+    const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
+    const enum AVPixelFormat *pix_fmts;     ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
+    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
+    const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
+    const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
+    uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder, no direct access, use av_codec_get_max_lowres()
+    const AVClass *priv_class;              ///< AVClass for the private context
+    const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
+
+    /*****************************************************************
+     * No fields below this line are part of the public API. They
+     * may not be used outside of libavcodec and can be changed and
+     * removed at will.
+     * New public fields should be added right above.
+     *****************************************************************
+     */
+    int priv_data_size;
+    struct AVCodec *next;
+    /**
+     * @name Frame-level threading support functions
+     * @{
+     */
+    /**
+     * If defined, called on thread contexts when they are created.
+     * If the codec allocates writable tables in init(), re-allocate them here.
+     * priv_data will be set to a copy of the original.
+     */
+    int (*init_thread_copy)(AVCodecContext *);
+    /**
+     * Copy necessary context variables from a previous thread context to the current one.
+     * If not defined, the next thread will start automatically; otherwise, the codec
+     * must call ff_thread_finish_setup().
+     *
+     * dst and src will (rarely) point to the same context, in which case memcpy should be skipped.
+     */
+    int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src);
+    /** @} */
+
+    /**
+     * Private codec-specific defaults.
+     */
+    const AVCodecDefault *defaults;
+
+    /**
+     * Initialize codec static data, called from avcodec_register().
+     */
+    void (*init_static_data)(struct AVCodec *codec);
+
+    int (*init)(AVCodecContext *);
+    int (*encode_sub)(AVCodecContext *, uint8_t *buf, int buf_size,
+                      const struct AVSubtitle *sub);
+    /**
+     * Encode data to an AVPacket.
+     *
+     * @param      avctx          codec context
+     * @param      avpkt          output AVPacket (may contain a user-provided buffer)
+     * @param[in]  frame          AVFrame containing the raw data to be encoded
+     * @param[out] got_packet_ptr encoder sets to 0 or 1 to indicate that a
+     *                            non-empty packet was returned in avpkt.
+     * @return 0 on success, negative error code on failure
+     */
+    int (*encode2)(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame,
+                   int *got_packet_ptr);
+    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
+    int (*close)(AVCodecContext *);
+    /**
+     * Decode/encode API with decoupled packet/frame dataflow. The API is the
+     * same as the avcodec_ prefixed APIs (avcodec_send_frame() etc.), except
+     * that:
+     * - never called if the codec is closed or the wrong type,
+     * - AVPacket parameter change side data is applied right before calling
+     *   AVCodec->send_packet,
+     * - if AV_CODEC_CAP_DELAY is not set, drain packets or frames are never sent,
+     * - only one drain packet is ever passed down (until the next flush()),
+     * - a drain AVPacket is always NULL (no need to check for avpkt->size).
+     */
+    int (*send_frame)(AVCodecContext *avctx, const AVFrame *frame);
+    int (*send_packet)(AVCodecContext *avctx, const AVPacket *avpkt);
+    int (*receive_frame)(AVCodecContext *avctx, AVFrame *frame);
+    int (*receive_packet)(AVCodecContext *avctx, AVPacket *avpkt);
+    /**
+     * Flush buffers.
+     * Will be called when seeking
+     */
+    void (*flush)(AVCodecContext *);
+    /**
+     * Internal codec capabilities.
+     * See FF_CODEC_CAP_* in internal.h
+     */
+    int caps_internal;
+} AVCodec;
+
+int av_codec_get_max_lowres(const AVCodec *codec);
+
+struct MpegEncContext;
+
+/**
+ * @defgroup lavc_hwaccel AVHWAccel
+ * @{
+ */
+typedef struct AVHWAccel {
+    /**
+     * Name of the hardware accelerated codec.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     */
+    const char *name;
+
+    /**
+     * Type of codec implemented by the hardware accelerator.
+     *
+     * See AVMEDIA_TYPE_xxx
+     */
+    enum AVMediaType type;
+
+    /**
+     * Codec implemented by the hardware accelerator.
+     *
+     * See AV_CODEC_ID_xxx
+     */
+    enum AVCodecID id;
+
+    /**
+     * Supported pixel format.
+     *
+     * Only hardware accelerated formats are supported here.
+     */
+    enum AVPixelFormat pix_fmt;
+
+    /**
+     * Hardware accelerated codec capabilities.
+     * see HWACCEL_CODEC_CAP_*
+     */
+    int capabilities;
+
+    /*****************************************************************
+     * No fields below this line are part of the public API. They
+     * may not be used outside of libavcodec and can be changed and
+     * removed at will.
+     * New public fields should be added right above.
+     *****************************************************************
+     */
+    struct AVHWAccel *next;
+
+    /**
+     * Allocate a custom buffer
+     */
+    int (*alloc_frame)(AVCodecContext *avctx, AVFrame *frame);
+
+    /**
+     * Called at the beginning of each frame or field picture.
+     *
+     * Meaningful frame information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * Note that buf can be NULL along with buf_size set to 0.
+     * Otherwise, this means the whole frame is available at this point.
+     *
+     * @param avctx the codec context
+     * @param buf the frame data buffer base
+     * @param buf_size the size of the frame in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Callback for each slice.
+     *
+     * Meaningful slice information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     * The only exception is XvMC, that works on MB level.
+     *
+     * @param avctx the codec context
+     * @param buf the slice data buffer base
+     * @param buf_size the size of the slice in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Called at the end of each frame or field picture.
+     *
+     * The whole picture is parsed at this point and can now be sent
+     * to the hardware accelerator. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*end_frame)(AVCodecContext *avctx);
+
+    /**
+     * Size of per-frame hardware accelerator private data.
+     *
+     * Private data is allocated with av_mallocz() before
+     * AVCodecContext.get_buffer() and deallocated after
+     * AVCodecContext.release_buffer().
+     */
+    int frame_priv_data_size;
+
+    /**
+     * Called for every Macroblock in a slice.
+     *
+     * XvMC uses it to replace the ff_mpv_decode_mb().
+     * Instead of decoding to raw picture, MB parameters are
+     * stored in an array provided by the video driver.
+     *
+     * @param s the mpeg context
+     */
+    void (*decode_mb)(struct MpegEncContext *s);
+
+    /**
+     * Initialize the hwaccel private data.
+     *
+     * This will be called from ff_get_format(), after hwaccel and
+     * hwaccel_context are set and the hwaccel private data in AVCodecInternal
+     * is allocated.
+     */
+    int (*init)(AVCodecContext *avctx);
+
+    /**
+     * Uninitialize the hwaccel private data.
+     *
+     * This will be called from get_format() or avcodec_close(), after hwaccel
+     * and hwaccel_context are already uninitialized.
+     */
+    int (*uninit)(AVCodecContext *avctx);
+
+    /**
+     * Size of the private data to allocate in
+     * AVCodecInternal.hwaccel_priv_data.
+     */
+    int priv_data_size;
+} AVHWAccel;
+
+/**
+ * Hardware acceleration should be used for decoding even if the codec level
+ * used is unknown or higher than the maximum supported level reported by the
+ * hardware driver.
+ *
+ * It's generally a good idea to pass this flag unless you have a specific
+ * reason not to, as hardware tends to under-report supported levels.
+ */
+#define AV_HWACCEL_FLAG_IGNORE_LEVEL (1 << 0)
+
+/**
+ * Hardware acceleration can output YUV pixel formats with a different chroma
+ * sampling than 4:2:0 and/or other than 8 bits per component.
+ */
+#define AV_HWACCEL_FLAG_ALLOW_HIGH_DEPTH (1 << 1)
+
+/**
+ * @}
+ */
+
+#if FF_API_AVPICTURE
+/**
+ * @defgroup lavc_picture AVPicture
+ *
+ * Functions for working with AVPicture
+ * @{
+ */
+
+/**
+ * Picture data structure.
+ *
+ * Up to four components can be stored into it, the last component is
+ * alpha.
+ * @deprecated use AVFrame or imgutils functions instead
+ */
+typedef struct AVPicture {
+    attribute_deprecated
+    uint8_t *data[AV_NUM_DATA_POINTERS];    ///< pointers to the image data planes
+    attribute_deprecated
+    int linesize[AV_NUM_DATA_POINTERS];     ///< number of bytes per line
+} AVPicture;
+
+/**
+ * @}
+ */
+#endif
+
+enum AVSubtitleType {
+    SUBTITLE_NONE,
+
+    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
+
+    /**
+     * Plain text, the text field must be set by the decoder and is
+     * authoritative. ass and pict fields may contain approximations.
+     */
+    SUBTITLE_TEXT,
+
+    /**
+     * Formatted text, the ass field must be set by the decoder and is
+     * authoritative. pict and text fields may contain approximations.
+     */
+    SUBTITLE_ASS,
+};
+
+#define AV_SUBTITLE_FLAG_FORCED 0x00000001
+
+typedef struct AVSubtitleRect {
+    int x;         ///< top left corner  of pict, undefined when pict is not set
+    int y;         ///< top left corner  of pict, undefined when pict is not set
+    int w;         ///< width            of pict, undefined when pict is not set
+    int h;         ///< height           of pict, undefined when pict is not set
+    int nb_colors; ///< number of colors in pict, undefined when pict is not set
+
+#if FF_API_AVPICTURE
+    /**
+     * @deprecated unused
+     */
+    attribute_deprecated
+    AVPicture pict;
+#endif
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     * Can be set for text/ass as well once they are rendered.
+     */
+    uint8_t *data[4];
+    int linesize[4];
+
+    enum AVSubtitleType type;
+
+    char *text;                     ///< 0 terminated plain UTF-8 text
+
+    /**
+     * 0 terminated ASS/SSA compatible event line.
+     * The presentation of this is unaffected by the other values in this
+     * struct.
+     */
+    char *ass;
+
+    int flags;
+} AVSubtitleRect;
+
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    unsigned num_rects;
+    AVSubtitleRect **rects;
+    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
+} AVSubtitle;
+
+/**
+ * This struct describes the properties of an encoded stream.
+ *
+ * sizeof(AVCodecParameters) is not a part of the public ABI, this struct must
+ * be allocated with avcodec_parameters_alloc() and freed with
+ * avcodec_parameters_free().
+ */
+typedef struct AVCodecParameters {
+    /**
+     * General type of the encoded data.
+     */
+    enum AVMediaType codec_type;
+    /**
+     * Specific type of the encoded data (the codec used).
+     */
+    enum AVCodecID   codec_id;
+    /**
+     * Additional information about the codec (corresponds to the AVI FOURCC).
+     */
+    uint32_t         codec_tag;
+
+    /**
+     * Extra binary data needed for initializing the decoder, codec-dependent.
+     *
+     * Must be allocated with av_malloc() and will be freed by
+     * avcodec_parameters_free(). The allocated size of extradata must be at
+     * least extradata_size + AV_INPUT_BUFFER_PADDING_SIZE, with the padding
+     * bytes zeroed.
+     */
+    uint8_t *extradata;
+    /**
+     * Size of the extradata content in bytes.
+     */
+    int      extradata_size;
+
+    /**
+     * - video: the pixel format, the value corresponds to enum AVPixelFormat.
+     * - audio: the sample format, the value corresponds to enum AVSampleFormat.
+     */
+    int format;
+
+    /**
+     * The average bitrate of the encoded data (in bits per second).
+     */
+    int64_t bit_rate;
+
+    /**
+     * The number of bits per sample in the codedwords.
+     *
+     * This is basically the bitrate per sample. It is mandatory for a bunch of
+     * formats to actually decode them. It's the number of bits for one sample in
+     * the actual coded bitstream.
+     *
+     * This could be for example 4 for ADPCM
+     * For PCM formats this matches bits_per_raw_sample
+     * Can be 0
+     */
+    int bits_per_coded_sample;
+
+    /**
+     * This is the number of valid bits in each output sample. If the
+     * sample format has more bits, the least significant bits are additional
+     * padding bits, which are always 0. Use right shifts to reduce the sample
+     * to its actual size. For example, audio formats with 24 bit samples will
+     * have bits_per_raw_sample set to 24, and format set to AV_SAMPLE_FMT_S32.
+     * To get the original sample use "(int32_t)sample >> 8"."
+     *
+     * For ADPCM this might be 12 or 16 or similar
+     * Can be 0
+     */
+    int bits_per_raw_sample;
+
+    /**
+     * Codec-specific bitstream restrictions that the stream conforms to.
+     */
+    int profile;
+    int level;
+
+    /**
+     * Video only. The dimensions of the video frame in pixels.
+     */
+    int width;
+    int height;
+
+    /**
+     * Video only. The aspect ratio (width / height) which a single pixel
+     * should have when displayed.
+     *
+     * When the aspect ratio is unknown / undefined, the numerator should be
+     * set to 0 (the denominator may have any value).
+     */
+    AVRational sample_aspect_ratio;
+
+    /**
+     * Video only. The order of the fields in interlaced video.
+     */
+    enum AVFieldOrder                  field_order;
+
+    /**
+     * Video only. Additional colorspace characteristics.
+     */
+    enum AVColorRange                  color_range;
+    enum AVColorPrimaries              color_primaries;
+    enum AVColorTransferCharacteristic color_trc;
+    enum AVColorSpace                  color_space;
+    enum AVChromaLocation              chroma_location;
+
+    /**
+     * Video only. Number of delayed frames.
+     */
+    int video_delay;
+
+    /**
+     * Audio only. The channel layout bitmask. May be 0 if the channel layout is
+     * unknown or unspecified, otherwise the number of bits set must be equal to
+     * the channels field.
+     */
+    uint64_t channel_layout;
+    /**
+     * Audio only. The number of audio channels.
+     */
+    int      channels;
+    /**
+     * Audio only. The number of audio samples per second.
+     */
+    int      sample_rate;
+    /**
+     * Audio only. The number of bytes per coded audio frame, required by some
+     * formats.
+     *
+     * Corresponds to nBlockAlign in WAVEFORMATEX.
+     */
+    int      block_align;
+    /**
+     * Audio only. Audio frame size, if known. Required by some formats to be static.
+     */
+    int      frame_size;
+
+    /**
+     * Audio only. The amount of padding (in samples) inserted by the encoder at
+     * the beginning of the audio. I.e. this number of leading decoded samples
+     * must be discarded by the caller to get the original audio without leading
+     * padding.
+     */
+    int initial_padding;
+    /**
+     * Audio only. The amount of padding (in samples) appended by the encoder to
+     * the end of the audio. I.e. this number of decoded samples must be
+     * discarded by the caller from the end of the stream to get the original
+     * audio without any trailing padding.
+     */
+    int trailing_padding;
+    /**
+     * Audio only. Number of samples to skip after a discontinuity.
+     */
+    int seek_preroll;
+} AVCodecParameters;
+
+/**
+ * If c is NULL, returns the first registered codec,
+ * if c is non-NULL, returns the next registered codec after c,
+ * or NULL if c is the last one.
+ */
+AVCodec *av_codec_next(const AVCodec *c);
+
+/**
+ * Return the LIBAVCODEC_VERSION_INT constant.
+ */
+unsigned avcodec_version(void);
+
+/**
+ * Return the libavcodec build-time configuration.
+ */
+const char *avcodec_configuration(void);
+
+/**
+ * Return the libavcodec license.
+ */
+const char *avcodec_license(void);
+
+/**
+ * Register the codec codec and initialize libavcodec.
+ *
+ * @warning either this function or avcodec_register_all() must be called
+ * before any other libavcodec functions.
+ *
+ * @see avcodec_register_all()
+ */
+void avcodec_register(AVCodec *codec);
+
+/**
+ * Register all the codecs, parsers and bitstream filters which were enabled at
+ * configuration time. If you do not call this function you can select exactly
+ * which formats you want to support, by using the individual registration
+ * functions.
+ *
+ * @see avcodec_register
+ * @see av_register_codec_parser
+ * @see av_register_bitstream_filter
+ */
+void avcodec_register_all(void);
+
+/**
+ * Allocate an AVCodecContext and set its fields to default values. The
+ * resulting struct should be freed with avcodec_free_context().
+ *
+ * @param codec if non-NULL, allocate private data and initialize defaults
+ *              for the given codec. It is illegal to then call avcodec_open2()
+ *              with a different codec.
+ *              If NULL, then the codec-specific defaults won't be initialized,
+ *              which may result in suboptimal default settings (this is
+ *              important mainly for encoders, e.g. libx264).
+ *
+ * @return An AVCodecContext filled with default values or NULL on failure.
+ */
+AVCodecContext *avcodec_alloc_context3(const AVCodec *codec);
+
+/**
+ * Free the codec context and everything associated with it and write NULL to
+ * the provided pointer.
+ */
+void avcodec_free_context(AVCodecContext **avctx);
+
+#if FF_API_GET_CONTEXT_DEFAULTS
+/**
+ * @deprecated This function should not be used, as closing and opening a codec
+ * context multiple time is not supported. A new codec context should be
+ * allocated for each new use.
+ */
+int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec);
+#endif
+
+/**
+ * Get the AVClass for AVCodecContext. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_class(void);
+
+#if FF_API_COPY_CONTEXT
+/**
+ * Get the AVClass for AVFrame. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_frame_class(void);
+
+/**
+ * Get the AVClass for AVSubtitleRect. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_subtitle_rect_class(void);
+
+/**
+ * Copy the settings of the source AVCodecContext into the destination
+ * AVCodecContext. The resulting destination codec context will be
+ * unopened, i.e. you are required to call avcodec_open2() before you
+ * can use this AVCodecContext to decode/encode video/audio data.
+ *
+ * @param dest target codec context, should be initialized with
+ *             avcodec_alloc_context3(NULL), but otherwise uninitialized
+ * @param src source codec context
+ * @return AVERROR() on error (e.g. memory allocation error), 0 on success
+ *
+ * @deprecated The semantics of this function are ill-defined and it should not
+ * be used. If you need to transfer the stream parameters from one codec context
+ * to another, use an intermediate AVCodecParameters instance and the
+ * avcodec_parameters_from_context() / avcodec_parameters_to_context()
+ * functions.
+ */
+attribute_deprecated
+int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src);
+#endif
+
+/**
+ * Allocate a new AVCodecParameters and set its fields to default values
+ * (unknown/invalid/0). The returned struct must be freed with
+ * avcodec_parameters_free().
+ */
+AVCodecParameters *avcodec_parameters_alloc(void);
+
+/**
+ * Free an AVCodecParameters instance and everything associated with it and
+ * write NULL to the supplied pointer.
+ */
+void avcodec_parameters_free(AVCodecParameters **par);
+
+/**
+ * Copy the contents of src to dst. Any allocated fields in dst are freed and
+ * replaced with newly allocated duplicates of the corresponding fields in src.
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure.
+ */
+int avcodec_parameters_copy(AVCodecParameters *dst, const AVCodecParameters *src);
+
+/**
+ * Fill the parameters struct based on the values from the supplied codec
+ * context. Any allocated fields in par are freed and replaced with duplicates
+ * of the corresponding fields in codec.
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure
+ */
+int avcodec_parameters_from_context(AVCodecParameters *par,
+                                    const AVCodecContext *codec);
+
+/**
+ * Fill the codec context based on the values from the supplied codec
+ * parameters. Any allocated fields in codec that have a corresponding field in
+ * par are freed and replaced with duplicates of the corresponding field in par.
+ * Fields in codec that do not have a counterpart in par are not touched.
+ *
+ * @return >= 0 on success, a negative AVERROR code on failure.
+ */
+int avcodec_parameters_to_context(AVCodecContext *codec,
+                                  const AVCodecParameters *par);
+
+/**
+ * Initialize the AVCodecContext to use the given AVCodec. Prior to using this
+ * function the context has to be allocated with avcodec_alloc_context3().
+ *
+ * The functions avcodec_find_decoder_by_name(), avcodec_find_encoder_by_name(),
+ * avcodec_find_decoder() and avcodec_find_encoder() provide an easy way for
+ * retrieving a codec.
+ *
+ * @warning This function is not thread safe!
+ *
+ * @note Always call this function before using decoding routines (such as
+ * @ref avcodec_receive_frame()).
+ *
+ * @code
+ * avcodec_register_all();
+ * av_dict_set(&opts, "b", "2.5M", 0);
+ * codec = avcodec_find_decoder(AV_CODEC_ID_H264);
+ * if (!codec)
+ *     exit(1);
+ *
+ * context = avcodec_alloc_context3(codec);
+ *
+ * if (avcodec_open2(context, codec, opts) < 0)
+ *     exit(1);
+ * @endcode
+ *
+ * @param avctx The context to initialize.
+ * @param codec The codec to open this context for. If a non-NULL codec has been
+ *              previously passed to avcodec_alloc_context3() or
+ *              for this context, then this parameter MUST be either NULL or
+ *              equal to the previously passed codec.
+ * @param options A dictionary filled with AVCodecContext and codec-private options.
+ *                On return this object will be filled with options that were not found.
+ *
+ * @return zero on success, a negative value on error
+ * @see avcodec_alloc_context3(), avcodec_find_decoder(), avcodec_find_encoder(),
+ *      av_dict_set(), av_opt_find().
+ */
+int avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
+
+/**
+ * Close a given AVCodecContext and free all the data associated with it
+ * (but not the AVCodecContext itself).
+ *
+ * Calling this function on an AVCodecContext that hasn't been opened will free
+ * the codec-specific data allocated in avcodec_alloc_context3() with a non-NULL
+ * codec. Subsequent calls will do nothing.
+ *
+ * @note Do not use this function. Use avcodec_free_context() to destroy a
+ * codec context (either open or closed). Opening and closing a codec context
+ * multiple times is not supported anymore -- use multiple codec contexts
+ * instead.
+ */
+int avcodec_close(AVCodecContext *avctx);
+
+/**
+ * Free all allocated data in the given subtitle struct.
+ *
+ * @param sub AVSubtitle to free.
+ */
+void avsubtitle_free(AVSubtitle *sub);
+
+/**
+ * @}
+ */
+
+/**
+ * @addtogroup lavc_packet
+ * @{
+ */
+
+/**
+ * Allocate an AVPacket and set its fields to default values.  The resulting
+ * struct must be freed using av_packet_free().
+ *
+ * @return An AVPacket filled with default values or NULL on failure.
+ *
+ * @note this only allocates the AVPacket itself, not the data buffers. Those
+ * must be allocated through other means such as av_new_packet.
+ *
+ * @see av_new_packet
+ */
+AVPacket *av_packet_alloc(void);
+
+/**
+ * Create a new packet that references the same data as src.
+ *
+ * This is a shortcut for av_packet_alloc()+av_packet_ref().
+ *
+ * @return newly created AVPacket on success, NULL on error.
+ *
+ * @see av_packet_alloc
+ * @see av_packet_ref
+ */
+AVPacket *av_packet_clone(AVPacket *src);
+
+/**
+ * Free the packet, if the packet is reference counted, it will be
+ * unreferenced first.
+ *
+ * @param packet packet to be freed. The pointer will be set to NULL.
+ * @note passing NULL is a no-op.
+ */
+void av_packet_free(AVPacket **pkt);
+
+/**
+ * Initialize optional fields of a packet with default values.
+ *
+ * Note, this does not touch the data and size members, which have to be
+ * initialized separately.
+ *
+ * @param pkt packet
+ */
+void av_init_packet(AVPacket *pkt);
+
+/**
+ * Allocate the payload of a packet and initialize its fields with
+ * default values.
+ *
+ * @param pkt packet
+ * @param size wanted payload size
+ * @return 0 if OK, AVERROR_xxx otherwise
+ */
+int av_new_packet(AVPacket *pkt, int size);
+
+/**
+ * Reduce packet size, correctly zeroing padding
+ *
+ * @param pkt packet
+ * @param size new size
+ */
+void av_shrink_packet(AVPacket *pkt, int size);
+
+/**
+ * Increase packet size, correctly zeroing padding
+ *
+ * @param pkt packet
+ * @param grow_by number of bytes by which to increase the size of the packet
+ */
+int av_grow_packet(AVPacket *pkt, int grow_by);
+
+/**
+ * Initialize a reference-counted packet from av_malloc()ed data.
+ *
+ * @param pkt packet to be initialized. This function will set the data, size,
+ *        buf and destruct fields, all others are left untouched.
+ * @param data Data allocated by av_malloc() to be used as packet data. If this
+ *        function returns successfully, the data is owned by the underlying AVBuffer.
+ *        The caller may not access the data through other means.
+ * @param size size of data in bytes, without the padding. I.e. the full buffer
+ *        size is assumed to be size + AV_INPUT_BUFFER_PADDING_SIZE.
+ *
+ * @return 0 on success, a negative AVERROR on error
+ */
+int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size);
+
+#if FF_API_AVPACKET_OLD_API
+/**
+ * @warning This is a hack - the packet memory allocation stuff is broken. The
+ * packet is allocated if it was not really allocated.
+ *
+ * @deprecated Use av_packet_ref
+ */
+attribute_deprecated
+int av_dup_packet(AVPacket *pkt);
+/**
+ * Copy packet, including contents
+ *
+ * @return 0 on success, negative AVERROR on fail
+ */
+int av_copy_packet(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Copy packet side data
+ *
+ * @return 0 on success, negative AVERROR on fail
+ */
+int av_copy_packet_side_data(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Free a packet.
+ *
+ * @deprecated Use av_packet_unref
+ *
+ * @param pkt packet to free
+ */
+attribute_deprecated
+void av_free_packet(AVPacket *pkt);
+#endif
+/**
+ * Allocate new information of a packet.
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param size side information size
+ * @return pointer to fresh allocated data or NULL otherwise
+ */
+uint8_t* av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 int size);
+
+/**
+ * Wrap an existing array as a packet side data.
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param data the side data array. It must be allocated with the av_malloc()
+ *             family of functions. The ownership of the data is transferred to
+ *             pkt.
+ * @param size side information size
+ * @return a non-negative number on success, a negative AVERROR code on
+ *         failure. On failure, the packet is unchanged and the data remains
+ *         owned by the caller.
+ */
+int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                            uint8_t *data, size_t size);
+
+/**
+ * Shrink the already allocated side data buffer
+ *
+ * @param pkt packet
+ * @param type side information type
+ * @param size new side information size
+ * @return 0 on success, < 0 on failure
+ */
+int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                               int size);
+
+/**
+ * Get side information from packet.
+ *
+ * @param pkt packet
+ * @param type desired side information type
+ * @param size pointer for side information size to store (optional)
+ * @return pointer to data if present or NULL otherwise
+ */
+uint8_t* av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 int *size);
+
+int av_packet_merge_side_data(AVPacket *pkt);
+
+int av_packet_split_side_data(AVPacket *pkt);
+
+const char *av_packet_side_data_name(enum AVPacketSideDataType type);
+
+/**
+ * Pack a dictionary for use in side_data.
+ *
+ * @param dict The dictionary to pack.
+ * @param size pointer to store the size of the returned data
+ * @return pointer to data if successful, NULL otherwise
+ */
+uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size);
+/**
+ * Unpack a dictionary from side_data.
+ *
+ * @param data data from side_data
+ * @param size size of the data
+ * @param dict the metadata storage dictionary
+ * @return 0 on success, < 0 on failure
+ */
+int av_packet_unpack_dictionary(const uint8_t *data, int size, AVDictionary **dict);
+
+
+/**
+ * Convenience function to free all the side data stored.
+ * All the other fields stay untouched.
+ *
+ * @param pkt packet
+ */
+void av_packet_free_side_data(AVPacket *pkt);
+
+/**
+ * Setup a new reference to the data described by a given packet
+ *
+ * If src is reference-counted, setup dst as a new reference to the
+ * buffer in src. Otherwise allocate a new buffer in dst and copy the
+ * data from src into it.
+ *
+ * All the other fields are copied from src.
+ *
+ * @see av_packet_unref
+ *
+ * @param dst Destination packet
+ * @param src Source packet
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ */
+int av_packet_ref(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Wipe the packet.
+ *
+ * Unreference the buffer referenced by the packet and reset the
+ * remaining packet fields to their default values.
+ *
+ * @param pkt The packet to be unreferenced.
+ */
+void av_packet_unref(AVPacket *pkt);
+
+/**
+ * Move every field in src to dst and reset src.
+ *
+ * @see av_packet_unref
+ *
+ * @param src Source packet, will be reset
+ * @param dst Destination packet
+ */
+void av_packet_move_ref(AVPacket *dst, AVPacket *src);
+
+/**
+ * Copy only "properties" fields from src to dst.
+ *
+ * Properties for the purpose of this function are all the fields
+ * beside those related to the packet data (buf, data, size)
+ *
+ * @param dst Destination packet
+ * @param src Source packet
+ *
+ * @return 0 on success AVERROR on failure.
+ */
+int av_packet_copy_props(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Convert valid timing fields (timestamps / durations) in a packet from one
+ * timebase to another. Timestamps with unknown values (AV_NOPTS_VALUE) will be
+ * ignored.
+ *
+ * @param pkt packet on which the conversion will be performed
+ * @param tb_src source timebase, in which the timing fields in pkt are
+ *               expressed
+ * @param tb_dst destination timebase, to which the timing fields will be
+ *               converted
+ */
+void av_packet_rescale_ts(AVPacket *pkt, AVRational tb_src, AVRational tb_dst);
+
+/**
+ * @}
+ */
+
+/**
+ * @addtogroup lavc_decoding
+ * @{
+ */
+
+/**
+ * Find a registered decoder with a matching codec ID.
+ *
+ * @param id AVCodecID of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder(enum AVCodecID id);
+
+/**
+ * Find a registered decoder with the specified name.
+ *
+ * @param name name of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder_by_name(const char *name);
+
+/**
+ * The default callback for AVCodecContext.get_buffer2(). It is made public so
+ * it can be called by custom get_buffer2() implementations for decoders without
+ * AV_CODEC_CAP_DR1 set.
+ */
+int avcodec_default_get_buffer2(AVCodecContext *s, AVFrame *frame, int flags);
+
+#if FF_API_EMU_EDGE
+/**
+ * Return the amount of padding in pixels which the get_buffer callback must
+ * provide around the edge of the image for codecs which do not have the
+ * CODEC_FLAG_EMU_EDGE flag.
+ *
+ * @return Required padding in pixels.
+ *
+ * @deprecated CODEC_FLAG_EMU_EDGE is deprecated, so this function is no longer
+ * needed
+ */
+attribute_deprecated
+unsigned avcodec_get_edge_width(void);
+#endif
+
+/**
+ * Modify width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you do not use any horizontal
+ * padding.
+ *
+ * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened.
+ */
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+
+/**
+ * Modify width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you also ensure that all
+ * line sizes are a multiple of the respective linesize_align[i].
+ *
+ * May only be used if a codec with AV_CODEC_CAP_DR1 has been opened.
+ */
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
+                               int linesize_align[AV_NUM_DATA_POINTERS]);
+
+/**
+ * Converts AVChromaLocation to swscale x/y chroma position.
+ *
+ * The positions represent the chroma (0,0) position in a coordinates system
+ * with luma (0,0) representing the origin and luma(1,1) representing 256,256
+ *
+ * @param xpos  horizontal chroma sample position
+ * @param ypos  vertical   chroma sample position
+ */
+int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos);
+
+/**
+ * Converts swscale x/y chroma position to AVChromaLocation.
+ *
+ * The positions represent the chroma (0,0) position in a coordinates system
+ * with luma (0,0) representing the origin and luma(1,1) representing 256,256
+ *
+ * @param xpos  horizontal chroma sample position
+ * @param ypos  vertical   chroma sample position
+ */
+enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
+
+/**
+ * Decode the audio frame of size avpkt->size from avpkt->data into frame.
+ *
+ * Some decoders may support multiple frames in a single AVPacket. Such
+ * decoders would then just decode the first frame and the return value would be
+ * less than the packet size. In this case, avcodec_decode_audio4 has to be
+ * called again with an AVPacket containing the remaining data in order to
+ * decode the second frame, etc...  Even if no frames are returned, the packet
+ * needs to be fed to the decoder with remaining data until it is completely
+ * consumed or an error occurs.
+ *
+ * Some decoders (those marked with AV_CODEC_CAP_DELAY) have a delay between input
+ * and output. This means that for some packets they will not immediately
+ * produce decoded output and need to be flushed at the end of decoding to get
+ * all the decoded data. Flushing is done by calling this function with packets
+ * with avpkt->data set to NULL and avpkt->size set to 0 until it stops
+ * returning samples. It is safe to flush even those decoders that are not
+ * marked with AV_CODEC_CAP_DELAY, then no samples will be returned.
+ *
+ * @warning The input buffer, avpkt->data must be AV_INPUT_BUFFER_PADDING_SIZE
+ *          larger than the actual read bytes because some optimized bitstream
+ *          readers read 32 or 64 bits at once and could read over the end.
+ *
+ * @note The AVCodecContext MUST have been opened with @ref avcodec_open2()
+ * before packets may be fed to the decoder.
+ *
+ * @param      avctx the codec context
+ * @param[out] frame The AVFrame in which to store decoded audio samples.
+ *                   The decoder will allocate a buffer for the decoded frame by
+ *                   calling the AVCodecContext.get_buffer2() callback.
+ *                   When AVCodecContext.refcounted_frames is set to 1, the frame is
+ *                   reference counted and the returned reference belongs to the
+ *                   caller. The caller must release the frame using av_frame_unref()
+ *                   when the frame is no longer needed. The caller may safely write
+ *                   to the frame if av_frame_is_writable() returns 1.
+ *                   When AVCodecContext.refcounted_frames is set to 0, the returned
+ *                   reference belongs to the decoder and is valid only until the
+ *                   next call to this function or until closing or flushing the
+ *                   decoder. The caller may not write to it.
+ * @param[out] got_frame_ptr Zero if no frame could be decoded, otherwise it is
+ *                           non-zero. Note that this field being set to zero
+ *                           does not mean that an error has occurred. For
+ *                           decoders with AV_CODEC_CAP_DELAY set, no given decode
+ *                           call is guaranteed to produce a frame.
+ * @param[in]  avpkt The input AVPacket containing the input buffer.
+ *                   At least avpkt->data and avpkt->size should be set. Some
+ *                   decoders might also require additional fields to be set.
+ * @return A negative error code is returned if an error occurred during
+ *         decoding, otherwise the number of bytes consumed from the input
+ *         AVPacket is returned.
+ *
+* @deprecated Use avcodec_send_packet() and avcodec_receive_frame().
+ */
+attribute_deprecated
+int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
+                          int *got_frame_ptr, const AVPacket *avpkt);
+
+/**
+ * Decode the video frame of size avpkt->size from avpkt->data into picture.
+ * Some decoders may support multiple frames in a single AVPacket, such
+ * decoders would then just decode the first frame.
+ *
+ * @warning The input buffer must be AV_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer buf should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note Codecs which have the AV_CODEC_CAP_DELAY capability set have a delay
+ * between input and output, these need to be fed with avpkt->data=NULL,
+ * avpkt->size=0 at the end to return the remaining frames.
+ *
+ * @note The AVCodecContext MUST have been opened with @ref avcodec_open2()
+ * before packets may be fed to the decoder.
+ *
+ * @param avctx the codec context
+ * @param[out] picture The AVFrame in which the decoded video frame will be stored.
+ *             Use av_frame_alloc() to get an AVFrame. The codec will
+ *             allocate memory for the actual bitmap by calling the
+ *             AVCodecContext.get_buffer2() callback.
+ *             When AVCodecContext.refcounted_frames is set to 1, the frame is
+ *             reference counted and the returned reference belongs to the
+ *             caller. The caller must release the frame using av_frame_unref()
+ *             when the frame is no longer needed. The caller may safely write
+ *             to the frame if av_frame_is_writable() returns 1.
+ *             When AVCodecContext.refcounted_frames is set to 0, the returned
+ *             reference belongs to the decoder and is valid only until the
+ *             next call to this function or until closing or flushing the
+ *             decoder. The caller may not write to it.
+ *
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ *            You can create such packet with av_init_packet() and by then setting
+ *            data and size, some decoders might in addition need other fields like
+ *            flags&AV_PKT_FLAG_KEY. All decoders are designed to use the least
+ *            fields possible.
+ * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ *
+ * @deprecated Use avcodec_send_packet() and avcodec_receive_frame().
+ */
+attribute_deprecated
+int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         const AVPacket *avpkt);
+
+/**
+ * Decode a subtitle message.
+ * Return a negative value on error, otherwise return the number of bytes used.
+ * If no subtitle could be decompressed, got_sub_ptr is zero.
+ * Otherwise, the subtitle is stored in *sub.
+ * Note that AV_CODEC_CAP_DR1 is not available for subtitle codecs. This is for
+ * simplicity, because the performance difference is expect to be negligible
+ * and reusing a get_buffer written for video codecs would probably perform badly
+ * due to a potentially very different allocation pattern.
+ *
+ * Some decoders (those marked with CODEC_CAP_DELAY) have a delay between input
+ * and output. This means that for some packets they will not immediately
+ * produce decoded output and need to be flushed at the end of decoding to get
+ * all the decoded data. Flushing is done by calling this function with packets
+ * with avpkt->data set to NULL and avpkt->size set to 0 until it stops
+ * returning subtitles. It is safe to flush even those decoders that are not
+ * marked with CODEC_CAP_DELAY, then no subtitles will be returned.
+ *
+ * @note The AVCodecContext MUST have been opened with @ref avcodec_open2()
+ * before packets may be fed to the decoder.
+ *
+ * @param avctx the codec context
+ * @param[out] sub The Preallocated AVSubtitle in which the decoded subtitle will be stored,
+ *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
+ * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ */
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            AVPacket *avpkt);
+
+/**
+ * Supply raw packet data as input to a decoder.
+ *
+ * Internally, this call will copy relevant AVCodecContext fields, which can
+ * influence decoding per-packet, and apply them when the packet is actually
+ * decoded. (For example AVCodecContext.skip_frame, which might direct the
+ * decoder to drop the frame contained by the packet sent with this function.)
+ *
+ * @warning The input buffer, avpkt->data must be AV_INPUT_BUFFER_PADDING_SIZE
+ *          larger than the actual read bytes because some optimized bitstream
+ *          readers read 32 or 64 bits at once and could read over the end.
+ *
+ * @warning Do not mix this API with the legacy API (like avcodec_decode_video2())
+ *          on the same AVCodecContext. It will return unexpected results now
+ *          or in future libavcodec versions.
+ *
+ * @note The AVCodecContext MUST have been opened with @ref avcodec_open2()
+ *       before packets may be fed to the decoder.
+ *
+ * @param avctx codec context
+ * @param[in] avpkt The input AVPacket. Usually, this will be a single video
+ *                  frame, or several complete audio frames.
+ *                  Ownership of the packet remains with the caller, and the
+ *                  decoder will not write to the packet. The decoder may create
+ *                  a reference to the packet data (or copy it if the packet is
+ *                  not reference-counted).
+ *                  Unlike with older APIs, the packet is always fully consumed,
+ *                  and if it contains multiple frames (e.g. some audio codecs),
+ *                  will require you to call avcodec_receive_frame() multiple
+ *                  times afterwards before you can send a new packet.
+ *                  It can be NULL (or an AVPacket with data set to NULL and
+ *                  size set to 0); in this case, it is considered a flush
+ *                  packet, which signals the end of the stream. Sending the
+ *                  first flush packet will return success. Subsequent ones are
+ *                  unnecessary and will return AVERROR_EOF. If the decoder
+ *                  still has frames buffered, it will return them after sending
+ *                  a flush packet.
+ *
+ * @return 0 on success, otherwise negative error code:
+ *      AVERROR(EAGAIN):   input is not accepted right now - the packet must be
+ *                         resent after trying to read output
+ *      AVERROR_EOF:       the decoder has been flushed, and no new packets can
+ *                         be sent to it (also returned if more than 1 flush
+ *                         packet is sent)
+ *      AVERROR(EINVAL):   codec not opened, it is an encoder, or requires flush
+ *      AVERROR(ENOMEM):   failed to add packet to internal queue, or similar
+ *      other errors: legitimate decoding errors
+ */
+int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
+
+/**
+ * Return decoded output data from a decoder.
+ *
+ * @param avctx codec context
+ * @param frame This will be set to a reference-counted video or audio
+ *              frame (depending on the decoder type) allocated by the
+ *              decoder. Note that the function will always call
+ *              av_frame_unref(frame) before doing anything else.
+ *
+ * @return
+ *      0:                 success, a frame was returned
+ *      AVERROR(EAGAIN):   output is not available right now - user must try
+ *                         to send new input
+ *      AVERROR_EOF:       the decoder has been fully flushed, and there will be
+ *                         no more output frames
+ *      AVERROR(EINVAL):   codec not opened, or it is an encoder
+ *      other negative values: legitimate decoding errors
+ */
+int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Supply a raw video or audio frame to the encoder. Use avcodec_receive_packet()
+ * to retrieve buffered output packets.
+ *
+ * @param avctx     codec context
+ * @param[in] frame AVFrame containing the raw audio or video frame to be encoded.
+ *                  Ownership of the frame remains with the caller, and the
+ *                  encoder will not write to the frame. The encoder may create
+ *                  a reference to the frame data (or copy it if the frame is
+ *                  not reference-counted).
+ *                  It can be NULL, in which case it is considered a flush
+ *                  packet.  This signals the end of the stream. If the encoder
+ *                  still has packets buffered, it will return them after this
+ *                  call. Once flushing mode has been entered, additional flush
+ *                  packets are ignored, and sending frames will return
+ *                  AVERROR_EOF.
+ *
+ *                  For audio:
+ *                  If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
+ *                  can have any number of samples.
+ *                  If it is not set, frame->nb_samples must be equal to
+ *                  avctx->frame_size for all frames except the last.
+ *                  The final frame may be smaller than avctx->frame_size.
+ * @return 0 on success, otherwise negative error code:
+ *      AVERROR(EAGAIN):   input is not accepted right now - the frame must be
+ *                         resent after trying to read output packets
+ *      AVERROR_EOF:       the encoder has been flushed, and no new frames can
+ *                         be sent to it
+ *      AVERROR(EINVAL):   codec not opened, refcounted_frames not set, it is a
+ *                         decoder, or requires flush
+ *      AVERROR(ENOMEM):   failed to add packet to internal queue, or similar
+ *      other errors: legitimate decoding errors
+ */
+int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame);
+
+/**
+ * Read encoded data from the encoder.
+ *
+ * @param avctx codec context
+ * @param avpkt This will be set to a reference-counted packet allocated by the
+ *              encoder. Note that the function will always call
+ *              av_frame_unref(frame) before doing anything else.
+ * @return 0 on success, otherwise negative error code:
+ *      AVERROR(EAGAIN):   output is not available right now - user must try
+ *                         to send input
+ *      AVERROR_EOF:       the encoder has been fully flushed, and there will be
+ *                         no more output packets
+ *      AVERROR(EINVAL):   codec not opened, or it is an encoder
+ *      other errors: legitimate decoding errors
+ */
+int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
+
+
+/**
+ * @defgroup lavc_parsing Frame parsing
+ * @{
+ */
+
+enum AVPictureStructure {
+    AV_PICTURE_STRUCTURE_UNKNOWN,      //< unknown
+    AV_PICTURE_STRUCTURE_TOP_FIELD,    //< coded as top field
+    AV_PICTURE_STRUCTURE_BOTTOM_FIELD, //< coded as bottom field
+    AV_PICTURE_STRUCTURE_FRAME,        //< coded as frame
+};
+
+typedef struct AVCodecParserContext {
+    void *priv_data;
+    struct AVCodecParser *parser;
+    int64_t frame_offset; /* offset of the current frame */
+    int64_t cur_offset; /* current offset
+                           (incremented by each av_parser_parse()) */
+    int64_t next_frame_offset; /* offset of the next frame */
+    /* video info */
+    int pict_type; /* XXX: Put it back in AVCodecContext. */
+    /**
+     * This field is used for proper frame duration computation in lavf.
+     * It signals, how much longer the frame duration of the current frame
+     * is compared to normal frame duration.
+     *
+     * frame_duration = (1 + repeat_pict) * time_base
+     *
+     * It is used by codecs like H.264 to display telecined material.
+     */
+    int repeat_pict; /* XXX: Put it back in AVCodecContext. */
+    int64_t pts;     /* pts of the current frame */
+    int64_t dts;     /* dts of the current frame */
+
+    /* private data */
+    int64_t last_pts;
+    int64_t last_dts;
+    int fetch_timestamp;
+
+#define AV_PARSER_PTS_NB 4
+    int cur_frame_start_index;
+    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
+    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
+    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
+
+    int flags;
+#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
+#define PARSER_FLAG_ONCE                      0x0002
+/// Set if the parser has a valid file offset
+#define PARSER_FLAG_FETCHED_OFFSET            0x0004
+#define PARSER_FLAG_USE_CODEC_TS              0x1000
+
+    int64_t offset;      ///< byte offset from starting packet start
+    int64_t cur_frame_end[AV_PARSER_PTS_NB];
+
+    /**
+     * Set by parser to 1 for key frames and 0 for non-key frames.
+     * It is initialized to -1, so if the parser doesn't set this flag,
+     * old-style fallback using AV_PICTURE_TYPE_I picture type as key frames
+     * will be used.
+     */
+    int key_frame;
+
+#if FF_API_CONVERGENCE_DURATION
+    /**
+     * @deprecated unused
+     */
+    attribute_deprecated
+    int64_t convergence_duration;
+#endif
+
+    // Timestamp generation support:
+    /**
+     * Synchronization point for start of timestamp generation.
+     *
+     * Set to >0 for sync point, 0 for no sync point and <0 for undefined
+     * (default).
+     *
+     * For example, this corresponds to presence of H.264 buffering period
+     * SEI message.
+     */
+    int dts_sync_point;
+
+    /**
+     * Offset of the current timestamp against last timestamp sync point in
+     * units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain a valid timestamp offset.
+     *
+     * Note that the timestamp of sync point has usually a nonzero
+     * dts_ref_dts_delta, which refers to the previous sync point. Offset of
+     * the next frame after timestamp sync point will be usually 1.
+     *
+     * For example, this corresponds to H.264 cpb_removal_delay.
+     */
+    int dts_ref_dts_delta;
+
+    /**
+     * Presentation delay of current frame in units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain valid non-negative timestamp delta (presentation time of a frame
+     * must not lie in the past).
+     *
+     * This delay represents the difference between decoding and presentation
+     * time of the frame.
+     *
+     * For example, this corresponds to H.264 dpb_output_delay.
+     */
+    int pts_dts_delta;
+
+    /**
+     * Position of the packet in file.
+     *
+     * Analogous to cur_frame_pts/dts
+     */
+    int64_t cur_frame_pos[AV_PARSER_PTS_NB];
+
+    /**
+     * Byte position of currently parsed frame in stream.
+     */
+    int64_t pos;
+
+    /**
+     * Previous frame byte position.
+     */
+    int64_t last_pos;
+
+    /**
+     * Duration of the current frame.
+     * For audio, this is in units of 1 / AVCodecContext.sample_rate.
+     * For all other types, this is in units of AVCodecContext.time_base.
+     */
+    int duration;
+
+    enum AVFieldOrder field_order;
+
+    /**
+     * Indicate whether a picture is coded as a frame, top field or bottom field.
+     *
+     * For example, H.264 field_pic_flag equal to 0 corresponds to
+     * AV_PICTURE_STRUCTURE_FRAME. An H.264 picture with field_pic_flag
+     * equal to 1 and bottom_field_flag equal to 0 corresponds to
+     * AV_PICTURE_STRUCTURE_TOP_FIELD.
+     */
+    enum AVPictureStructure picture_structure;
+
+    /**
+     * Picture number incremented in presentation or output order.
+     * This field may be reinitialized at the first picture of a new sequence.
+     *
+     * For example, this corresponds to H.264 PicOrderCnt.
+     */
+    int output_picture_number;
+
+    /**
+     * Dimensions of the decoded video intended for presentation.
+     */
+    int width;
+    int height;
+
+    /**
+     * Dimensions of the coded video.
+     */
+    int coded_width;
+    int coded_height;
+
+    /**
+     * The format of the coded data, corresponds to enum AVPixelFormat for video
+     * and for enum AVSampleFormat for audio.
+     *
+     * Note that a decoder can have considerable freedom in how exactly it
+     * decodes the data, so the format reported here might be different from the
+     * one returned by a decoder.
+     */
+    int format;
+} AVCodecParserContext;
+
+typedef struct AVCodecParser {
+    int codec_ids[5]; /* several codec IDs are permitted */
+    int priv_data_size;
+    int (*parser_init)(AVCodecParserContext *s);
+    /* This callback never returns an error, a negative value means that
+     * the frame start was in a previous packet. */
+    int (*parser_parse)(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size);
+    void (*parser_close)(AVCodecParserContext *s);
+    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
+    struct AVCodecParser *next;
+} AVCodecParser;
+
+AVCodecParser *av_parser_next(const AVCodecParser *c);
+
+void av_register_codec_parser(AVCodecParser *parser);
+AVCodecParserContext *av_parser_init(int codec_id);
+
+/**
+ * Parse a packet.
+ *
+ * @param s             parser context.
+ * @param avctx         codec context.
+ * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
+ * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
+ * @param buf           input buffer.
+ * @param buf_size      input length, to signal EOF, this should be 0 (so that the last frame can be output).
+ * @param pts           input presentation timestamp.
+ * @param dts           input decoding timestamp.
+ * @param pos           input byte position in stream.
+ * @return the number of bytes of the input bitstream used.
+ *
+ * Example:
+ * @code
+ *   while(in_len){
+ *       len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
+ *                                        in_data, in_len,
+ *                                        pts, dts, pos);
+ *       in_data += len;
+ *       in_len  -= len;
+ *
+ *       if(size)
+ *          decode_frame(data, size);
+ *   }
+ * @endcode
+ */
+int av_parser_parse2(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts,
+                     int64_t pos);
+
+/**
+ * @return 0 if the output buffer is a subset of the input, 1 if it is allocated and must be freed
+ * @deprecated use AVBitStreamFilter
+ */
+int av_parser_change(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe);
+void av_parser_close(AVCodecParserContext *s);
+
+/**
+ * @}
+ * @}
+ */
+
+/**
+ * @addtogroup lavc_encoding
+ * @{
+ */
+
+/**
+ * Find a registered encoder with a matching codec ID.
+ *
+ * @param id AVCodecID of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder(enum AVCodecID id);
+
+/**
+ * Find a registered encoder with the specified name.
+ *
+ * @param name name of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder_by_name(const char *name);
+
+/**
+ * Encode a frame of audio.
+ *
+ * Takes input samples from frame and writes the next output packet, if
+ * available, to avpkt. The output packet does not necessarily contain data for
+ * the most recent frame, as encoders can delay, split, and combine input frames
+ * internally as needed.
+ *
+ * @param avctx     codec context
+ * @param avpkt     output AVPacket.
+ *                  The user can supply an output buffer by setting
+ *                  avpkt->data and avpkt->size prior to calling the
+ *                  function, but if the size of the user-provided data is not
+ *                  large enough, encoding will fail. If avpkt->data and
+ *                  avpkt->size are set, avpkt->destruct must also be set. All
+ *                  other AVPacket fields will be reset by the encoder using
+ *                  av_init_packet(). If avpkt->data is NULL, the encoder will
+ *                  allocate it. The encoder will set avpkt->size to the size
+ *                  of the output packet.
+ *
+ *                  If this function fails or produces no output, avpkt will be
+ *                  freed using av_packet_unref().
+ * @param[in] frame AVFrame containing the raw audio data to be encoded.
+ *                  May be NULL when flushing an encoder that has the
+ *                  AV_CODEC_CAP_DELAY capability set.
+ *                  If AV_CODEC_CAP_VARIABLE_FRAME_SIZE is set, then each frame
+ *                  can have any number of samples.
+ *                  If it is not set, frame->nb_samples must be equal to
+ *                  avctx->frame_size for all frames except the last.
+ *                  The final frame may be smaller than avctx->frame_size.
+ * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
+ *                            output packet is non-empty, and to 0 if it is
+ *                            empty. If the function returns an error, the
+ *                            packet can be assumed to be invalid, and the
+ *                            value of got_packet_ptr is undefined and should
+ *                            not be used.
+ * @return          0 on success, negative error code on failure
+ *
+ * @deprecated use avcodec_send_frame()/avcodec_receive_packet() instead
+ */
+attribute_deprecated
+int avcodec_encode_audio2(AVCodecContext *avctx, AVPacket *avpkt,
+                          const AVFrame *frame, int *got_packet_ptr);
+
+/**
+ * Encode a frame of video.
+ *
+ * Takes input raw video data from frame and writes the next output packet, if
+ * available, to avpkt. The output packet does not necessarily contain data for
+ * the most recent frame, as encoders can delay and reorder input frames
+ * internally as needed.
+ *
+ * @param avctx     codec context
+ * @param avpkt     output AVPacket.
+ *                  The user can supply an output buffer by setting
+ *                  avpkt->data and avpkt->size prior to calling the
+ *                  function, but if the size of the user-provided data is not
+ *                  large enough, encoding will fail. All other AVPacket fields
+ *                  will be reset by the encoder using av_init_packet(). If
+ *                  avpkt->data is NULL, the encoder will allocate it.
+ *                  The encoder will set avpkt->size to the size of the
+ *                  output packet. The returned data (if any) belongs to the
+ *                  caller, he is responsible for freeing it.
+ *
+ *                  If this function fails or produces no output, avpkt will be
+ *                  freed using av_packet_unref().
+ * @param[in] frame AVFrame containing the raw video data to be encoded.
+ *                  May be NULL when flushing an encoder that has the
+ *                  AV_CODEC_CAP_DELAY capability set.
+ * @param[out] got_packet_ptr This field is set to 1 by libavcodec if the
+ *                            output packet is non-empty, and to 0 if it is
+ *                            empty. If the function returns an error, the
+ *                            packet can be assumed to be invalid, and the
+ *                            value of got_packet_ptr is undefined and should
+ *                            not be used.
+ * @return          0 on success, negative error code on failure
+ *
+ * @deprecated use avcodec_send_frame()/avcodec_receive_packet() instead
+ */
+attribute_deprecated
+int avcodec_encode_video2(AVCodecContext *avctx, AVPacket *avpkt,
+                          const AVFrame *frame, int *got_packet_ptr);
+
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub);
+
+
+/**
+ * @}
+ */
+
+#if FF_API_AVCODEC_RESAMPLE
+/**
+ * @defgroup lavc_resample Audio resampling
+ * @ingroup libavc
+ * @deprecated use libswresample instead
+ *
+ * @{
+ */
+struct ReSampleContext;
+struct AVResampleContext;
+
+typedef struct ReSampleContext ReSampleContext;
+
+/**
+ *  Initialize audio resampling context.
+ *
+ * @param output_channels  number of output channels
+ * @param input_channels   number of input channels
+ * @param output_rate      output sample rate
+ * @param input_rate       input sample rate
+ * @param sample_fmt_out   requested output sample format
+ * @param sample_fmt_in    input sample format
+ * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff frequency
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear           if 1 then the used FIR filter will be linearly interpolated
+                           between the 2 closest, if 0 the closest will be used
+ * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
+ * @return allocated ReSampleContext, NULL if error occurred
+ */
+attribute_deprecated
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum AVSampleFormat sample_fmt_out,
+                                        enum AVSampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff);
+
+attribute_deprecated
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
+
+/**
+ * Free resample context.
+ *
+ * @param s a non-NULL pointer to a resample context previously
+ *          created with av_audio_resample_init()
+ */
+attribute_deprecated
+void audio_resample_close(ReSampleContext *s);
+
+
+/**
+ * Initialize an audio resampler.
+ * Note, if either rate is not an integer then simply scale both rates up so they are.
+ * @param filter_length length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear If 1 then the used FIR filter will be linearly interpolated
+                 between the 2 closest, if 0 the closest will be used
+ * @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
+ */
+attribute_deprecated
+struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
+
+/**
+ * Resample an array of samples using a previously configured context.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx If this is 0 then the context will not be modified, that way several channels can be resampled with the same context.
+ * @return the number of samples written in dst or -1 if an error occurred
+ */
+attribute_deprecated
+int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
+
+
+/**
+ * Compensate samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions occur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+attribute_deprecated
+void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
+attribute_deprecated
+void av_resample_close(struct AVResampleContext *c);
+
+/**
+ * @}
+ */
+#endif
+
+#if FF_API_AVPICTURE
+/**
+ * @addtogroup lavc_picture
+ * @{
+ */
+
+/**
+ * @deprecated unused
+ */
+attribute_deprecated
+int avpicture_alloc(AVPicture *picture, enum AVPixelFormat pix_fmt, int width, int height);
+
+/**
+ * @deprecated unused
+ */
+attribute_deprecated
+void avpicture_free(AVPicture *picture);
+
+/**
+ * @deprecated use av_image_fill_arrays() instead.
+ */
+attribute_deprecated
+int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
+                   enum AVPixelFormat pix_fmt, int width, int height);
+
+/**
+ * @deprecated use av_image_copy_to_buffer() instead.
+ */
+attribute_deprecated
+int avpicture_layout(const AVPicture *src, enum AVPixelFormat pix_fmt,
+                     int width, int height,
+                     unsigned char *dest, int dest_size);
+
+/**
+ * @deprecated use av_image_get_buffer_size() instead.
+ */
+attribute_deprecated
+int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height);
+
+/**
+ * @deprecated av_image_copy() instead.
+ */
+attribute_deprecated
+void av_picture_copy(AVPicture *dst, const AVPicture *src,
+                     enum AVPixelFormat pix_fmt, int width, int height);
+
+/**
+ * @deprecated unused
+ */
+attribute_deprecated
+int av_picture_crop(AVPicture *dst, const AVPicture *src,
+                    enum AVPixelFormat pix_fmt, int top_band, int left_band);
+
+/**
+ * @deprecated unused
+ */
+attribute_deprecated
+int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, enum AVPixelFormat pix_fmt,
+            int padtop, int padbottom, int padleft, int padright, int *color);
+
+/**
+ * @}
+ */
+#endif
+
+/**
+ * @defgroup lavc_misc Utility functions
+ * @ingroup libavc
+ *
+ * Miscellaneous utility functions related to both encoding and decoding
+ * (or neither).
+ * @{
+ */
+
+/**
+ * @defgroup lavc_misc_pixfmt Pixel formats
+ *
+ * Functions for working with pixel formats.
+ * @{
+ */
+
+/**
+ * Utility function to access log2_chroma_w log2_chroma_h from
+ * the pixel format AVPixFmtDescriptor.
+ *
+ * This function asserts that pix_fmt is valid. See av_pix_fmt_get_chroma_sub_sample
+ * for one that returns a failure code and continues in case of invalid
+ * pix_fmts.
+ *
+ * @param[in]  pix_fmt the pixel format
+ * @param[out] h_shift store log2_chroma_w
+ * @param[out] v_shift store log2_chroma_h
+ *
+ * @see av_pix_fmt_get_chroma_sub_sample
+ */
+
+void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift);
+
+/**
+ * Return a value representing the fourCC code associated to the
+ * pixel format pix_fmt, or 0 if no associated fourCC code can be
+ * found.
+ */
+unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat pix_fmt);
+
+/**
+ * @deprecated see av_get_pix_fmt_loss()
+ */
+int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt, enum AVPixelFormat src_pix_fmt,
+                             int has_alpha);
+
+/**
+ * Find the best pixel format to convert to given a certain source pixel
+ * format.  When converting from one pixel format to another, information loss
+ * may occur.  For example, when converting from RGB24 to GRAY, the color
+ * information will be lost. Similarly, other losses occur when converting from
+ * some formats to other formats. avcodec_find_best_pix_fmt_of_2() searches which of
+ * the given pixel formats should be used to suffer the least amount of loss.
+ * The pixel formats from which it chooses one, are determined by the
+ * pix_fmt_list parameter.
+ *
+ *
+ * @param[in] pix_fmt_list AV_PIX_FMT_NONE terminated array of pixel formats to choose from
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur.
+ * @return The best pixel format to convert to or -1 if none was found.
+ */
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr);
+
+/**
+ * @deprecated see av_find_best_pix_fmt_of_2()
+ */
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
+
+attribute_deprecated
+#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
+enum AVPixelFormat avcodec_find_best_pix_fmt2(const enum AVPixelFormat *pix_fmt_list,
+                                              enum AVPixelFormat src_pix_fmt,
+                                              int has_alpha, int *loss_ptr);
+#else
+enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
+#endif
+
+
+enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
+
+/**
+ * @}
+ */
+
+#if FF_API_SET_DIMENSIONS
+/**
+ * @deprecated this function is not supposed to be used from outside of lavc
+ */
+attribute_deprecated
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
+#endif
+
+/**
+ * Put a string representing the codec tag codec_tag in buf.
+ *
+ * @param buf       buffer to place codec tag in
+ * @param buf_size size in bytes of buf
+ * @param codec_tag codec tag to assign
+ * @return the length of the string that would have been generated if
+ * enough space had been available, excluding the trailing null
+ */
+size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_tag);
+
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
+
+/**
+ * Return a name for the specified profile, if available.
+ *
+ * @param codec the codec that is searched for the given profile
+ * @param profile the profile value for which a name is requested
+ * @return A name for the profile if found, NULL otherwise.
+ */
+const char *av_get_profile_name(const AVCodec *codec, int profile);
+
+/**
+ * Return a name for the specified profile, if available.
+ *
+ * @param codec_id the ID of the codec to which the requested profile belongs
+ * @param profile the profile value for which a name is requested
+ * @return A name for the profile if found, NULL otherwise.
+ *
+ * @note unlike av_get_profile_name(), which searches a list of profiles
+ *       supported by a specific decoder or encoder implementation, this
+ *       function searches the list of profiles from the AVCodecDescriptor
+ */
+const char *avcodec_profile_name(enum AVCodecID codec_id, int profile);
+
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count);
+//FIXME func typedef
+
+/**
+ * Fill AVFrame audio data and linesize pointers.
+ *
+ * The buffer buf must be a preallocated buffer with a size big enough
+ * to contain the specified samples amount. The filled AVFrame data
+ * pointers will point to this buffer.
+ *
+ * AVFrame extended_data channel pointers are allocated if necessary for
+ * planar audio.
+ *
+ * @param frame       the AVFrame
+ *                    frame->nb_samples must be set prior to calling the
+ *                    function. This function fills in frame->data,
+ *                    frame->extended_data, frame->linesize[0].
+ * @param nb_channels channel count
+ * @param sample_fmt  sample format
+ * @param buf         buffer to use for frame data
+ * @param buf_size    size of buffer
+ * @param align       plane size sample alignment (0 = default)
+ * @return            >=0 on success, negative error code on failure
+ * @todo return the size in bytes required to store the samples in
+ * case of success, at the next libavutil bump
+ */
+int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
+                             enum AVSampleFormat sample_fmt, const uint8_t *buf,
+                             int buf_size, int align);
+
+/**
+ * Reset the internal decoder state / flush internal buffers. Should be called
+ * e.g. when seeking or when switching to a different stream.
+ *
+ * @note when refcounted frames are not used (i.e. avctx->refcounted_frames is 0),
+ * this invalidates the frames previously returned from the decoder. When
+ * refcounted frames are used, the decoder just releases any references it might
+ * keep internally, but the caller's reference remains valid.
+ */
+void avcodec_flush_buffers(AVCodecContext *avctx);
+
+/**
+ * Return codec bits per sample.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_bits_per_sample(enum AVCodecID codec_id);
+
+/**
+ * Return the PCM codec associated with a sample format.
+ * @param be  endianness, 0 for little, 1 for big,
+ *            -1 (or anything else) for native
+ * @return  AV_CODEC_ID_PCM_* or AV_CODEC_ID_NONE
+ */
+enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be);
+
+/**
+ * Return codec bits per sample.
+ * Only return non-zero if the bits per sample is exactly correct, not an
+ * approximation.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_exact_bits_per_sample(enum AVCodecID codec_id);
+
+/**
+ * Return audio frame duration.
+ *
+ * @param avctx        codec context
+ * @param frame_bytes  size of the frame, or 0 if unknown
+ * @return             frame duration, in samples, if known. 0 if not able to
+ *                     determine.
+ */
+int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes);
+
+/**
+ * This function is the same as av_get_audio_frame_duration(), except it works
+ * with AVCodecParameters instead of an AVCodecContext.
+ */
+int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes);
+
+#if FF_API_OLD_BSF
+typedef struct AVBitStreamFilterContext {
+    void *priv_data;
+    struct AVBitStreamFilter *filter;
+    AVCodecParserContext *parser;
+    struct AVBitStreamFilterContext *next;
+    /**
+     * Internal default arguments, used if NULL is passed to av_bitstream_filter_filter().
+     * Not for access by library users.
+     */
+    char *args;
+} AVBitStreamFilterContext;
+#endif
+
+typedef struct AVBSFInternal AVBSFInternal;
+
+/**
+ * The bitstream filter state.
+ *
+ * This struct must be allocated with av_bsf_alloc() and freed with
+ * av_bsf_free().
+ *
+ * The fields in the struct will only be changed (by the caller or by the
+ * filter) as described in their documentation, and are to be considered
+ * immutable otherwise.
+ */
+typedef struct AVBSFContext {
+    /**
+     * A class for logging and AVOptions
+     */
+    const AVClass *av_class;
+
+    /**
+     * The bitstream filter this context is an instance of.
+     */
+    const struct AVBitStreamFilter *filter;
+
+    /**
+     * Opaque libavcodec internal data. Must not be touched by the caller in any
+     * way.
+     */
+    AVBSFInternal *internal;
+
+    /**
+     * Opaque filter-specific private data. If filter->priv_class is non-NULL,
+     * this is an AVOptions-enabled struct.
+     */
+    void *priv_data;
+
+    /**
+     * Parameters of the input stream. Set by the caller before av_bsf_init().
+     */
+    AVCodecParameters *par_in;
+
+    /**
+     * Parameters of the output stream. Set by the filter in av_bsf_init().
+     */
+    AVCodecParameters *par_out;
+
+    /**
+     * The timebase used for the timestamps of the input packets. Set by the
+     * caller before av_bsf_init().
+     */
+    AVRational time_base_in;
+
+    /**
+     * The timebase used for the timestamps of the output packets. Set by the
+     * filter in av_bsf_init().
+     */
+    AVRational time_base_out;
+} AVBSFContext;
+
+typedef struct AVBitStreamFilter {
+    const char *name;
+
+    /**
+     * A list of codec ids supported by the filter, terminated by
+     * AV_CODEC_ID_NONE.
+     * May be NULL, in that case the bitstream filter works with any codec id.
+     */
+    const enum AVCodecID *codec_ids;
+
+    /**
+     * A class for the private data, used to declare bitstream filter private
+     * AVOptions. This field is NULL for bitstream filters that do not declare
+     * any options.
+     *
+     * If this field is non-NULL, the first member of the filter private data
+     * must be a pointer to AVClass, which will be set by libavcodec generic
+     * code to this class.
+     */
+    const AVClass *priv_class;
+
+    /*****************************************************************
+     * No fields below this line are part of the public API. They
+     * may not be used outside of libavcodec and can be changed and
+     * removed at will.
+     * New public fields should be added right above.
+     *****************************************************************
+     */
+
+    int priv_data_size;
+    int (*init)(AVBSFContext *ctx);
+    int (*filter)(AVBSFContext *ctx, AVPacket *pkt);
+    void (*close)(AVBSFContext *ctx);
+} AVBitStreamFilter;
+
+#if FF_API_OLD_BSF
+/**
+ * Register a bitstream filter.
+ *
+ * The filter will be accessible to the application code through
+ * av_bitstream_filter_next() or can be directly initialized with
+ * av_bitstream_filter_init().
+ *
+ * @see avcodec_register_all()
+ */
+attribute_deprecated
+void av_register_bitstream_filter(AVBitStreamFilter *bsf);
+
+/**
+ * Create and initialize a bitstream filter context given a bitstream
+ * filter name.
+ *
+ * The returned context must be freed with av_bitstream_filter_close().
+ *
+ * @param name    the name of the bitstream filter
+ * @return a bitstream filter context if a matching filter was found
+ * and successfully initialized, NULL otherwise
+ */
+attribute_deprecated
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
+
+/**
+ * Filter bitstream.
+ *
+ * This function filters the buffer buf with size buf_size, and places the
+ * filtered buffer in the buffer pointed to by poutbuf.
+ *
+ * The output buffer must be freed by the caller.
+ *
+ * @param bsfc            bitstream filter context created by av_bitstream_filter_init()
+ * @param avctx           AVCodecContext accessed by the filter, may be NULL.
+ *                        If specified, this must point to the encoder context of the
+ *                        output stream the packet is sent to.
+ * @param args            arguments which specify the filter configuration, may be NULL
+ * @param poutbuf         pointer which is updated to point to the filtered buffer
+ * @param poutbuf_size    pointer which is updated to the filtered buffer size in bytes
+ * @param buf             buffer containing the data to filter
+ * @param buf_size        size in bytes of buf
+ * @param keyframe        set to non-zero if the buffer to filter corresponds to a key-frame packet data
+ * @return >= 0 in case of success, or a negative error code in case of failure
+ *
+ * If the return value is positive, an output buffer is allocated and
+ * is available in *poutbuf, and is distinct from the input buffer.
+ *
+ * If the return value is 0, the output buffer is not allocated and
+ * should be considered identical to the input buffer, or in case
+ * *poutbuf was set it points to the input buffer (not necessarily to
+ * its starting address). A special case is if *poutbuf was set to NULL and
+ * *poutbuf_size was set to 0, which indicates the packet should be dropped.
+ */
+attribute_deprecated
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                               uint8_t **poutbuf, int *poutbuf_size,
+                               const uint8_t *buf, int buf_size, int keyframe);
+
+/**
+ * Release bitstream filter context.
+ *
+ * @param bsf the bitstream filter context created with
+ * av_bitstream_filter_init(), can be NULL
+ */
+attribute_deprecated
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
+
+/**
+ * If f is NULL, return the first registered bitstream filter,
+ * if f is non-NULL, return the next registered bitstream filter
+ * after f, or NULL if f is the last one.
+ *
+ * This function can be used to iterate over all registered bitstream
+ * filters.
+ */
+attribute_deprecated
+AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f);
+#endif
+
+/**
+ * @return a bitstream filter with the specified name or NULL if no such
+ *         bitstream filter exists.
+ */
+const AVBitStreamFilter *av_bsf_get_by_name(const char *name);
+
+/**
+ * Iterate over all registered bitstream filters.
+ *
+ * @param opaque a pointer where libavcodec will store the iteration state. Must
+ *               point to NULL to start the iteration.
+ *
+ * @return the next registered bitstream filter or NULL when the iteration is
+ *         finished
+ */
+const AVBitStreamFilter *av_bsf_next(void **opaque);
+
+/**
+ * Allocate a context for a given bitstream filter. The caller must fill in the
+ * context parameters as described in the documentation and then call
+ * av_bsf_init() before sending any data to the filter.
+ *
+ * @param filter the filter for which to allocate an instance.
+ * @param ctx a pointer into which the pointer to the newly-allocated context
+ *            will be written. It must be freed with av_bsf_free() after the
+ *            filtering is done.
+ *
+ * @return 0 on success, a negative AVERROR code on failure
+ */
+int av_bsf_alloc(const AVBitStreamFilter *filter, AVBSFContext **ctx);
+
+/**
+ * Prepare the filter for use, after all the parameters and options have been
+ * set.
+ */
+int av_bsf_init(AVBSFContext *ctx);
+
+/**
+ * Submit a packet for filtering.
+ *
+ * After sending each packet, the filter must be completely drained by calling
+ * av_bsf_receive_packet() repeatedly until it returns AVERROR(EAGAIN) or
+ * AVERROR_EOF.
+ *
+ * @param pkt the packet to filter. The bitstream filter will take ownership of
+ * the packet and reset the contents of pkt. pkt is not touched if an error occurs.
+ * This parameter may be NULL, which signals the end of the stream (i.e. no more
+ * packets will be sent). That will cause the filter to output any packets it
+ * may have buffered internally.
+ *
+ * @return 0 on success, a negative AVERROR on error.
+ */
+int av_bsf_send_packet(AVBSFContext *ctx, AVPacket *pkt);
+
+/**
+ * Retrieve a filtered packet.
+ *
+ * @param[out] pkt this struct will be filled with the contents of the filtered
+ *                 packet. It is owned by the caller and must be freed using
+ *                 av_packet_unref() when it is no longer needed.
+ *                 This parameter should be "clean" (i.e. freshly allocated
+ *                 with av_packet_alloc() or unreffed with av_packet_unref())
+ *                 when this function is called. If this function returns
+ *                 successfully, the contents of pkt will be completely
+ *                 overwritten by the returned data. On failure, pkt is not
+ *                 touched.
+ *
+ * @return 0 on success. AVERROR(EAGAIN) if more packets need to be sent to the
+ * filter (using av_bsf_send_packet()) to get more output. AVERROR_EOF if there
+ * will be no further output from the filter. Another negative AVERROR value if
+ * an error occurs.
+ *
+ * @note one input packet may result in several output packets, so after sending
+ * a packet with av_bsf_send_packet(), this function needs to be called
+ * repeatedly until it stops returning 0. It is also possible for a filter to
+ * output fewer packets than were sent to it, so this function may return
+ * AVERROR(EAGAIN) immediately after a successful av_bsf_send_packet() call.
+ */
+int av_bsf_receive_packet(AVBSFContext *ctx, AVPacket *pkt);
+
+/**
+ * Free a bitstream filter context and everything associated with it; write NULL
+ * into the supplied pointer.
+ */
+void av_bsf_free(AVBSFContext **ctx);
+
+/**
+ * Get the AVClass for AVBSFContext. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *av_bsf_get_class(void);
+
+/* memory */
+
+/**
+ * Same behaviour av_fast_malloc but the buffer has additional
+ * AV_INPUT_BUFFER_PADDING_SIZE at the end which will always be 0.
+ *
+ * In addition the whole buffer will initially and after resizes
+ * be 0-initialized so that no uninitialized data will ever appear.
+ */
+void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * Same behaviour av_fast_padded_malloc except that buffer will always
+ * be 0-initialized after call.
+ */
+void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size);
+
+/**
+ * Encode extradata length to a buffer. Used by xiph codecs.
+ *
+ * @param s buffer to write to; must be at least (v/255+1) bytes long
+ * @param v size of extradata in bytes
+ * @return number of bytes written to the buffer.
+ */
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+
+#if FF_API_MISSING_SAMPLE
+/**
+ * Log a generic warning message about a missing feature. This function is
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
+ * only, and would normally not be used by applications.
+ * @param[in] avc a pointer to an arbitrary struct of which the first field is
+ * a pointer to an AVClass struct
+ * @param[in] feature string containing the name of the missing feature
+ * @param[in] want_sample indicates if samples are wanted which exhibit this feature.
+ * If want_sample is non-zero, additional verbiage will be added to the log
+ * message which tells the user how to report samples to the development
+ * mailing list.
+ * @deprecated Use avpriv_report_missing_feature() instead.
+ */
+attribute_deprecated
+void av_log_missing_feature(void *avc, const char *feature, int want_sample);
+
+/**
+ * Log a generic warning message asking for a sample. This function is
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
+ * only, and would normally not be used by applications.
+ * @param[in] avc a pointer to an arbitrary struct of which the first field is
+ * a pointer to an AVClass struct
+ * @param[in] msg string containing an optional message, or NULL if no message
+ * @deprecated Use avpriv_request_sample() instead.
+ */
+attribute_deprecated
+void av_log_ask_for_sample(void *avc, const char *msg, ...) av_printf_format(2, 3);
+#endif /* FF_API_MISSING_SAMPLE */
+
+/**
+ * Register the hardware accelerator hwaccel.
+ */
+void av_register_hwaccel(AVHWAccel *hwaccel);
+
+/**
+ * If hwaccel is NULL, returns the first registered hardware accelerator,
+ * if hwaccel is non-NULL, returns the next registered hardware accelerator
+ * after hwaccel, or NULL if hwaccel is the last one.
+ */
+AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel);
+
+
+/**
+ * Lock operation used by lockmgr
+ */
+enum AVLockOp {
+  AV_LOCK_CREATE,  ///< Create a mutex
+  AV_LOCK_OBTAIN,  ///< Lock the mutex
+  AV_LOCK_RELEASE, ///< Unlock the mutex
+  AV_LOCK_DESTROY, ///< Free mutex resources
+};
+
+/**
+ * Register a user provided lock manager supporting the operations
+ * specified by AVLockOp. The "mutex" argument to the function points
+ * to a (void *) where the lockmgr should store/get a pointer to a user
+ * allocated mutex. It is NULL upon AV_LOCK_CREATE and equal to the
+ * value left by the last call for all other ops. If the lock manager is
+ * unable to perform the op then it should leave the mutex in the same
+ * state as when it was called and return a non-zero value. However,
+ * when called with AV_LOCK_DESTROY the mutex will always be assumed to
+ * have been successfully destroyed. If av_lockmgr_register succeeds
+ * it will return a non-negative value, if it fails it will return a
+ * negative value and destroy all mutex and unregister all callbacks.
+ * av_lockmgr_register is not thread-safe, it must be called from a
+ * single thread before any calls which make use of locking are used.
+ *
+ * @param cb User defined callback. av_lockmgr_register invokes calls
+ *           to this callback and the previously registered callback.
+ *           The callback will be used to create more than one mutex
+ *           each of which must be backed by its own underlying locking
+ *           mechanism (i.e. do not use a single static object to
+ *           implement your lock manager). If cb is set to NULL the
+ *           lockmgr will be unregistered.
+ */
+int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op));
+
+/**
+ * Get the type of the given codec.
+ */
+enum AVMediaType avcodec_get_type(enum AVCodecID codec_id);
+
+/**
+ * Get the name of a codec.
+ * @return  a static string identifying the codec; never NULL
+ */
+const char *avcodec_get_name(enum AVCodecID id);
+
+/**
+ * @return a positive value if s is open (i.e. avcodec_open2() was called on it
+ * with no corresponding avcodec_close()), 0 otherwise.
+ */
+int avcodec_is_open(AVCodecContext *s);
+
+/**
+ * @return a non-zero number if codec is an encoder, zero otherwise
+ */
+int av_codec_is_encoder(const AVCodec *codec);
+
+/**
+ * @return a non-zero number if codec is a decoder, zero otherwise
+ */
+int av_codec_is_decoder(const AVCodec *codec);
+
+/**
+ * @return descriptor for given codec ID or NULL if no descriptor exists.
+ */
+const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id);
+
+/**
+ * Iterate over all codec descriptors known to libavcodec.
+ *
+ * @param prev previous descriptor. NULL to get the first descriptor.
+ *
+ * @return next descriptor or NULL after the last descriptor
+ */
+const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev);
+
+/**
+ * @return codec descriptor with the given name or NULL if no such descriptor
+ *         exists.
+ */
+const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name);
+
+/**
+ * Allocate a CPB properties structure and initialize its fields to default
+ * values.
+ *
+ * @param size if non-NULL, the size of the allocated struct will be written
+ *             here. This is useful for embedding it in side data.
+ *
+ * @return the newly allocated struct or NULL on failure
+ */
+AVCPBProperties *av_cpb_properties_alloc(size_t *size);
+
+/**
+ * @}
+ */
+
+#endif /* AVCODEC_AVCODEC_H */
diff --git a/media/ffvpx/libavcodec/avcodec.symbols b/media/ffvpx/libavcodec/avcodec.symbols
new file mode 100644
index 000000000..cf507f722
--- /dev/null
+++ b/media/ffvpx/libavcodec/avcodec.symbols
@@ -0,0 +1,129 @@
+av_bitstream_filter_close
+av_bitstream_filter_filter
+av_bitstream_filter_init
+av_bitstream_filter_next
+av_codec_ffversion
+av_codec_get_chroma_intra_matrix
+av_codec_get_codec_descriptor
+av_codec_get_codec_properties
+av_codec_get_lowres
+av_codec_get_max_lowres
+av_codec_get_pkt_timebase
+av_codec_get_seek_preroll
+av_codec_is_decoder
+av_codec_is_encoder
+av_codec_next
+av_codec_set_chroma_intra_matrix
+av_codec_set_codec_descriptor
+av_codec_set_lowres
+av_codec_set_pkt_timebase
+av_codec_set_seek_preroll
+av_copy_packet
+av_copy_packet_side_data
+av_dup_packet
+av_fast_padded_malloc
+av_fast_padded_mallocz
+av_free_packet
+av_get_audio_frame_duration
+av_get_bits_per_sample
+av_get_codec_tag_string
+av_get_exact_bits_per_sample
+av_get_pcm_codec
+av_get_profile_name
+av_grow_packet
+av_hwaccel_next
+av_init_packet
+av_lockmgr_register
+av_log_ask_for_sample
+av_log_missing_feature
+av_new_packet
+av_packet_copy_props
+av_packet_free_side_data
+av_packet_from_data
+av_packet_get_side_data
+av_packet_merge_side_data
+av_packet_move_ref
+av_packet_new_side_data
+av_packet_pack_dictionary
+av_packet_ref
+av_packet_rescale_ts
+av_packet_shrink_side_data
+av_packet_side_data_name
+av_packet_split_side_data
+av_packet_unpack_dictionary
+av_packet_unref
+av_parser_change
+av_parser_close
+av_parser_init
+av_parser_next
+av_parser_parse2
+av_picture_copy
+av_picture_crop
+av_picture_pad
+av_qsv_alloc_context
+av_register_bitstream_filter
+av_register_codec_parser
+av_register_hwaccel
+av_resample
+av_resample_close
+av_resample_compensate
+av_resample_init
+av_shrink_packet
+av_vorbis_parse_frame
+av_vorbis_parse_frame_flags
+av_vorbis_parse_free
+av_vorbis_parse_init
+av_vorbis_parse_reset
+av_xiphlacing
+avcodec_align_dimensions
+avcodec_align_dimensions2
+avcodec_alloc_context3
+avcodec_chroma_pos_to_enum
+avcodec_close
+avcodec_configuration
+avcodec_copy_context
+avcodec_decode_audio4
+avcodec_decode_subtitle2
+avcodec_decode_video2
+avcodec_default_execute
+avcodec_default_execute2
+avcodec_default_get_buffer2
+avcodec_default_get_format
+avcodec_descriptor_get
+avcodec_descriptor_get_by_name
+avcodec_descriptor_next
+avcodec_enum_to_chroma_pos
+avcodec_fill_audio_frame
+avcodec_find_best_pix_fmt2
+avcodec_find_best_pix_fmt_of_2
+avcodec_find_best_pix_fmt_of_list
+avcodec_find_decoder
+avcodec_find_decoder_by_name
+avcodec_find_encoder
+avcodec_find_encoder_by_name
+avcodec_flush_buffers
+avcodec_free_context
+avcodec_get_chroma_sub_sample
+avcodec_get_class
+avcodec_get_context_defaults3
+avcodec_get_edge_width
+avcodec_get_frame_class
+avcodec_get_name
+avcodec_get_pix_fmt_loss
+avcodec_get_subtitle_rect_class
+avcodec_get_type
+avcodec_is_open
+avcodec_license
+avcodec_open2
+avcodec_pix_fmt_to_codec_tag
+avcodec_register
+avcodec_register_all
+avcodec_set_dimensions
+avcodec_string
+avcodec_version
+avpicture_alloc
+avpicture_fill
+avpicture_free
+avpicture_get_size
+avpicture_layout
+avsubtitle_free
diff --git a/media/ffvpx/libavcodec/avdct.h b/media/ffvpx/libavcodec/avdct.h
new file mode 100644
index 000000000..272422e44
--- /dev/null
+++ b/media/ffvpx/libavcodec/avdct.h
@@ -0,0 +1,84 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVDCT_H
+#define AVCODEC_AVDCT_H
+
+#include "libavutil/opt.h"
+
+/**
+ * AVDCT context.
+ * @note function pointers can be NULL if the specific features have been
+ *       disabled at build time.
+ */
+typedef struct AVDCT {
+    const AVClass *av_class;
+
+    void (*idct)(int16_t *block /* align 16 */);
+
+    /**
+     * IDCT input permutation.
+     * Several optimized IDCTs need a permutated input (relative to the
+     * normal order of the reference IDCT).
+     * This permutation must be performed before the idct_put/add.
+     * Note, normally this can be merged with the zigzag/alternate scan<br>
+     * An example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+     * - (x -> reference DCT -> reference IDCT -> x)
+     * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+     *    -> simple_idct_mmx -> x)
+     * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+     *    -> simple_idct_mmx -> ...)
+     */
+    uint8_t idct_permutation[64];
+
+    void (*fdct)(int16_t *block /* align 16 */);
+
+
+    /**
+     * DCT algorithm.
+     * must use AVOptions to set this field.
+     */
+    int dct_algo;
+
+    /**
+     * IDCT algorithm.
+     * must use AVOptions to set this field.
+     */
+    int idct_algo;
+
+    void (*get_pixels)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels /* align 8 */,
+                       ptrdiff_t line_size);
+
+    int bits_per_sample;
+} AVDCT;
+
+/**
+ * Allocates a AVDCT context.
+ * This needs to be initialized with avcodec_dct_init() after optionally
+ * configuring it with AVOptions.
+ *
+ * To free it use av_free()
+ */
+AVDCT *avcodec_dct_alloc(void);
+int avcodec_dct_init(AVDCT *);
+
+const AVClass *avcodec_dct_get_class(void);
+
+#endif /* AVCODEC_AVDCT_H */
diff --git a/media/ffvpx/libavcodec/avpacket.c b/media/ffvpx/libavcodec/avpacket.c
new file mode 100644
index 000000000..921868923
--- /dev/null
+++ b/media/ffvpx/libavcodec/avpacket.c
@@ -0,0 +1,664 @@
+/*
+ * AVPacket functions for libavcodec
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/mem.h"
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+void av_init_packet(AVPacket *pkt)
+{
+    pkt->pts                  = AV_NOPTS_VALUE;
+    pkt->dts                  = AV_NOPTS_VALUE;
+    pkt->pos                  = -1;
+    pkt->duration             = 0;
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+    pkt->convergence_duration = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    pkt->flags                = 0;
+    pkt->stream_index         = 0;
+    pkt->buf                  = NULL;
+    pkt->side_data            = NULL;
+    pkt->side_data_elems      = 0;
+}
+
+AVPacket *av_packet_alloc(void)
+{
+    AVPacket *pkt = av_mallocz(sizeof(AVPacket));
+    if (!pkt)
+        return pkt;
+
+    av_packet_unref(pkt);
+
+    return pkt;
+}
+
+void av_packet_free(AVPacket **pkt)
+{
+    if (!pkt || !*pkt)
+        return;
+
+    av_packet_unref(*pkt);
+    av_freep(pkt);
+}
+
+static int packet_alloc(AVBufferRef **buf, int size)
+{
+    int ret;
+    if (size < 0 || size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    ret = av_buffer_realloc(buf, size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (ret < 0)
+        return ret;
+
+    memset((*buf)->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+int av_new_packet(AVPacket *pkt, int size)
+{
+    AVBufferRef *buf = NULL;
+    int ret = packet_alloc(&buf, size);
+    if (ret < 0)
+        return ret;
+
+    av_init_packet(pkt);
+    pkt->buf      = buf;
+    pkt->data     = buf->data;
+    pkt->size     = size;
+
+    return 0;
+}
+
+void av_shrink_packet(AVPacket *pkt, int size)
+{
+    if (pkt->size <= size)
+        return;
+    pkt->size = size;
+    memset(pkt->data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+}
+
+int av_grow_packet(AVPacket *pkt, int grow_by)
+{
+    int new_size;
+    av_assert0((unsigned)pkt->size <= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
+    if ((unsigned)grow_by >
+        INT_MAX - (pkt->size + AV_INPUT_BUFFER_PADDING_SIZE))
+        return -1;
+
+    new_size = pkt->size + grow_by + AV_INPUT_BUFFER_PADDING_SIZE;
+    if (pkt->buf) {
+        size_t data_offset;
+        uint8_t *old_data = pkt->data;
+        if (pkt->data == NULL) {
+            data_offset = 0;
+            pkt->data = pkt->buf->data;
+        } else {
+            data_offset = pkt->data - pkt->buf->data;
+            if (data_offset > INT_MAX - new_size)
+                return -1;
+        }
+
+        if (new_size + data_offset > pkt->buf->size) {
+            int ret = av_buffer_realloc(&pkt->buf, new_size + data_offset);
+            if (ret < 0) {
+                pkt->data = old_data;
+                return ret;
+            }
+            pkt->data = pkt->buf->data + data_offset;
+        }
+    } else {
+        pkt->buf = av_buffer_alloc(new_size);
+        if (!pkt->buf)
+            return AVERROR(ENOMEM);
+        memcpy(pkt->buf->data, pkt->data, pkt->size);
+        pkt->data = pkt->buf->data;
+    }
+    pkt->size += grow_by;
+    memset(pkt->data + pkt->size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+
+    return 0;
+}
+
+int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size)
+{
+    if (size >= INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(EINVAL);
+
+    pkt->buf = av_buffer_create(data, size + AV_INPUT_BUFFER_PADDING_SIZE,
+                                av_buffer_default_free, NULL, 0);
+    if (!pkt->buf)
+        return AVERROR(ENOMEM);
+
+    pkt->data = data;
+    pkt->size = size;
+
+    return 0;
+}
+
+#if FF_API_AVPACKET_OLD_API
+FF_DISABLE_DEPRECATION_WARNINGS
+#define ALLOC_MALLOC(data, size) data = av_malloc(size)
+#define ALLOC_BUF(data, size)                \
+do {                                         \
+    av_buffer_realloc(&pkt->buf, size);      \
+    data = pkt->buf ? pkt->buf->data : NULL; \
+} while (0)
+
+#define DUP_DATA(dst, src, size, padding, ALLOC)                        \
+    do {                                                                \
+        void *data;                                                     \
+        if (padding) {                                                  \
+            if ((unsigned)(size) >                                      \
+                (unsigned)(size) + AV_INPUT_BUFFER_PADDING_SIZE)        \
+                goto failed_alloc;                                      \
+            ALLOC(data, size + AV_INPUT_BUFFER_PADDING_SIZE);           \
+        } else {                                                        \
+            ALLOC(data, size);                                          \
+        }                                                               \
+        if (!data)                                                      \
+            goto failed_alloc;                                          \
+        memcpy(data, src, size);                                        \
+        if (padding)                                                    \
+            memset((uint8_t *)data + size, 0,                           \
+                   AV_INPUT_BUFFER_PADDING_SIZE);                       \
+        dst = data;                                                     \
+    } while (0)
+
+/* Makes duplicates of data, side_data, but does not copy any other fields */
+static int copy_packet_data(AVPacket *pkt, const AVPacket *src, int dup)
+{
+    pkt->data      = NULL;
+    pkt->side_data = NULL;
+    if (pkt->buf) {
+        AVBufferRef *ref = av_buffer_ref(src->buf);
+        if (!ref)
+            return AVERROR(ENOMEM);
+        pkt->buf  = ref;
+        pkt->data = ref->data;
+    } else {
+        DUP_DATA(pkt->data, src->data, pkt->size, 1, ALLOC_BUF);
+    }
+    if (pkt->side_data_elems && dup)
+        pkt->side_data = src->side_data;
+    if (pkt->side_data_elems && !dup) {
+        return av_copy_packet_side_data(pkt, src);
+    }
+    return 0;
+
+failed_alloc:
+    av_packet_unref(pkt);
+    return AVERROR(ENOMEM);
+}
+
+int av_copy_packet_side_data(AVPacket *pkt, const AVPacket *src)
+{
+    if (src->side_data_elems) {
+        int i;
+        DUP_DATA(pkt->side_data, src->side_data,
+                src->side_data_elems * sizeof(*src->side_data), 0, ALLOC_MALLOC);
+        if (src != pkt) {
+            memset(pkt->side_data, 0,
+                   src->side_data_elems * sizeof(*src->side_data));
+        }
+        for (i = 0; i < src->side_data_elems; i++) {
+            DUP_DATA(pkt->side_data[i].data, src->side_data[i].data,
+                    src->side_data[i].size, 1, ALLOC_MALLOC);
+            pkt->side_data[i].size = src->side_data[i].size;
+            pkt->side_data[i].type = src->side_data[i].type;
+        }
+    }
+    pkt->side_data_elems = src->side_data_elems;
+    return 0;
+
+failed_alloc:
+    av_packet_unref(pkt);
+    return AVERROR(ENOMEM);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+int av_dup_packet(AVPacket *pkt)
+{
+    AVPacket tmp_pkt;
+
+    if (!pkt->buf && pkt->data) {
+        tmp_pkt = *pkt;
+        return copy_packet_data(pkt, &tmp_pkt, 1);
+    }
+    return 0;
+}
+
+int av_copy_packet(AVPacket *dst, const AVPacket *src)
+{
+    *dst = *src;
+    return copy_packet_data(dst, src, 0);
+}
+
+void av_packet_free_side_data(AVPacket *pkt)
+{
+    int i;
+    for (i = 0; i < pkt->side_data_elems; i++)
+        av_freep(&pkt->side_data[i].data);
+    av_freep(&pkt->side_data);
+    pkt->side_data_elems = 0;
+}
+
+#if FF_API_AVPACKET_OLD_API
+FF_DISABLE_DEPRECATION_WARNINGS
+void av_free_packet(AVPacket *pkt)
+{
+    if (pkt) {
+        if (pkt->buf)
+            av_buffer_unref(&pkt->buf);
+        pkt->data            = NULL;
+        pkt->size            = 0;
+
+        av_packet_free_side_data(pkt);
+    }
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+int av_packet_add_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                            uint8_t *data, size_t size)
+{
+    int elems = pkt->side_data_elems;
+
+    if ((unsigned)elems + 1 > INT_MAX / sizeof(*pkt->side_data))
+        return AVERROR(ERANGE);
+
+    pkt->side_data = av_realloc(pkt->side_data,
+                                (elems + 1) * sizeof(*pkt->side_data));
+    if (!pkt->side_data)
+        return AVERROR(ENOMEM);
+
+    pkt->side_data[elems].data = data;
+    pkt->side_data[elems].size = size;
+    pkt->side_data[elems].type = type;
+    pkt->side_data_elems++;
+
+    return 0;
+}
+
+
+uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 int size)
+{
+    int ret;
+    uint8_t *data;
+
+    if ((unsigned)size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE)
+        return NULL;
+    data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
+    if (!data)
+        return NULL;
+
+    ret = av_packet_add_side_data(pkt, type, data, size);
+    if (ret < 0) {
+        av_freep(&data);
+        return NULL;
+    }
+
+    return data;
+}
+
+uint8_t *av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                                 int *size)
+{
+    int i;
+
+    for (i = 0; i < pkt->side_data_elems; i++) {
+        if (pkt->side_data[i].type == type) {
+            if (size)
+                *size = pkt->side_data[i].size;
+            return pkt->side_data[i].data;
+        }
+    }
+    return NULL;
+}
+
+const char *av_packet_side_data_name(enum AVPacketSideDataType type)
+{
+    switch(type) {
+    case AV_PKT_DATA_PALETTE:                    return "Palette";
+    case AV_PKT_DATA_NEW_EXTRADATA:              return "New Extradata";
+    case AV_PKT_DATA_PARAM_CHANGE:               return "Param Change";
+    case AV_PKT_DATA_H263_MB_INFO:               return "H263 MB Info";
+    case AV_PKT_DATA_REPLAYGAIN:                 return "Replay Gain";
+    case AV_PKT_DATA_DISPLAYMATRIX:              return "Display Matrix";
+    case AV_PKT_DATA_STEREO3D:                   return "Stereo 3D";
+    case AV_PKT_DATA_AUDIO_SERVICE_TYPE:         return "Audio Service Type";
+    case AV_PKT_DATA_SKIP_SAMPLES:               return "Skip Samples";
+    case AV_PKT_DATA_JP_DUALMONO:                return "JP Dual Mono";
+    case AV_PKT_DATA_STRINGS_METADATA:           return "Strings Metadata";
+    case AV_PKT_DATA_SUBTITLE_POSITION:          return "Subtitle Position";
+    case AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL:   return "Matroska BlockAdditional";
+    case AV_PKT_DATA_WEBVTT_IDENTIFIER:          return "WebVTT ID";
+    case AV_PKT_DATA_WEBVTT_SETTINGS:            return "WebVTT Settings";
+    case AV_PKT_DATA_METADATA_UPDATE:            return "Metadata Update";
+    case AV_PKT_DATA_MPEGTS_STREAM_ID:           return "MPEGTS Stream ID";
+    case AV_PKT_DATA_MASTERING_DISPLAY_METADATA: return "Mastering display metadata";
+    }
+    return NULL;
+}
+
+#define FF_MERGE_MARKER 0x8c4d9d108e25e9feULL
+
+int av_packet_merge_side_data(AVPacket *pkt){
+    if(pkt->side_data_elems){
+        AVBufferRef *buf;
+        int i;
+        uint8_t *p;
+        uint64_t size= pkt->size + 8LL + AV_INPUT_BUFFER_PADDING_SIZE;
+        AVPacket old= *pkt;
+        for (i=0; i<old.side_data_elems; i++) {
+            size += old.side_data[i].size + 5LL;
+        }
+        if (size > INT_MAX)
+            return AVERROR(EINVAL);
+        buf = av_buffer_alloc(size);
+        if (!buf)
+            return AVERROR(ENOMEM);
+        pkt->buf = buf;
+        pkt->data = p = buf->data;
+        pkt->size = size - AV_INPUT_BUFFER_PADDING_SIZE;
+        bytestream_put_buffer(&p, old.data, old.size);
+        for (i=old.side_data_elems-1; i>=0; i--) {
+            bytestream_put_buffer(&p, old.side_data[i].data, old.side_data[i].size);
+            bytestream_put_be32(&p, old.side_data[i].size);
+            *p++ = old.side_data[i].type | ((i==old.side_data_elems-1)*128);
+        }
+        bytestream_put_be64(&p, FF_MERGE_MARKER);
+        av_assert0(p-pkt->data == pkt->size);
+        memset(p, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+        av_packet_unref(&old);
+        pkt->side_data_elems = 0;
+        pkt->side_data = NULL;
+        return 1;
+    }
+    return 0;
+}
+
+int av_packet_split_side_data(AVPacket *pkt){
+    if (!pkt->side_data_elems && pkt->size >12 && AV_RB64(pkt->data + pkt->size - 8) == FF_MERGE_MARKER){
+        int i;
+        unsigned int size;
+        uint8_t *p;
+
+        p = pkt->data + pkt->size - 8 - 5;
+        for (i=1; ; i++){
+            size = AV_RB32(p);
+            if (size>INT_MAX - 5 || p - pkt->data < size)
+                return 0;
+            if (p[4]&128)
+                break;
+            if (p - pkt->data < size + 5)
+                return 0;
+            p-= size+5;
+        }
+
+        pkt->side_data = av_malloc_array(i, sizeof(*pkt->side_data));
+        if (!pkt->side_data)
+            return AVERROR(ENOMEM);
+
+        p= pkt->data + pkt->size - 8 - 5;
+        for (i=0; ; i++){
+            size= AV_RB32(p);
+            av_assert0(size<=INT_MAX - 5 && p - pkt->data >= size);
+            pkt->side_data[i].data = av_mallocz(size + AV_INPUT_BUFFER_PADDING_SIZE);
+            pkt->side_data[i].size = size;
+            pkt->side_data[i].type = p[4]&127;
+            if (!pkt->side_data[i].data)
+                return AVERROR(ENOMEM);
+            memcpy(pkt->side_data[i].data, p-size, size);
+            pkt->size -= size + 5;
+            if(p[4]&128)
+                break;
+            p-= size+5;
+        }
+        pkt->size -= 8;
+        pkt->side_data_elems = i+1;
+        return 1;
+    }
+    return 0;
+}
+
+uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size)
+{
+    AVDictionaryEntry *t = NULL;
+    uint8_t *data = NULL;
+    *size = 0;
+
+    if (!dict)
+        return NULL;
+
+    while ((t = av_dict_get(dict, "", t, AV_DICT_IGNORE_SUFFIX))) {
+        const size_t keylen   = strlen(t->key);
+        const size_t valuelen = strlen(t->value);
+        const size_t new_size = *size + keylen + 1 + valuelen + 1;
+        uint8_t *const new_data = av_realloc(data, new_size);
+
+        if (!new_data)
+            goto fail;
+        data = new_data;
+        if (new_size > INT_MAX)
+            goto fail;
+
+        memcpy(data + *size, t->key, keylen + 1);
+        memcpy(data + *size + keylen + 1, t->value, valuelen + 1);
+
+        *size = new_size;
+    }
+
+    return data;
+
+fail:
+    av_freep(&data);
+    *size = 0;
+    return NULL;
+}
+
+int av_packet_unpack_dictionary(const uint8_t *data, int size, AVDictionary **dict)
+{
+    const uint8_t *end = data + size;
+    int ret = 0;
+
+    if (!dict || !data || !size)
+        return ret;
+    if (size && end[-1])
+        return AVERROR_INVALIDDATA;
+    while (data < end) {
+        const uint8_t *key = data;
+        const uint8_t *val = data + strlen(key) + 1;
+
+        if (val >= end)
+            return AVERROR_INVALIDDATA;
+
+        ret = av_dict_set(dict, key, val, 0);
+        if (ret < 0)
+            break;
+        data = val + strlen(val) + 1;
+    }
+
+    return ret;
+}
+
+int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
+                               int size)
+{
+    int i;
+
+    for (i = 0; i < pkt->side_data_elems; i++) {
+        if (pkt->side_data[i].type == type) {
+            if (size > pkt->side_data[i].size)
+                return AVERROR(ENOMEM);
+            pkt->side_data[i].size = size;
+            return 0;
+        }
+    }
+    return AVERROR(ENOENT);
+}
+
+int av_packet_copy_props(AVPacket *dst, const AVPacket *src)
+{
+    int i;
+
+    dst->pts                  = src->pts;
+    dst->dts                  = src->dts;
+    dst->pos                  = src->pos;
+    dst->duration             = src->duration;
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+    dst->convergence_duration = src->convergence_duration;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    dst->flags                = src->flags;
+    dst->stream_index         = src->stream_index;
+
+    for (i = 0; i < src->side_data_elems; i++) {
+         enum AVPacketSideDataType type = src->side_data[i].type;
+         int size          = src->side_data[i].size;
+         uint8_t *src_data = src->side_data[i].data;
+         uint8_t *dst_data = av_packet_new_side_data(dst, type, size);
+
+        if (!dst_data) {
+            av_packet_free_side_data(dst);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(dst_data, src_data, size);
+    }
+
+    return 0;
+}
+
+void av_packet_unref(AVPacket *pkt)
+{
+    av_packet_free_side_data(pkt);
+    av_buffer_unref(&pkt->buf);
+    av_init_packet(pkt);
+    pkt->data = NULL;
+    pkt->size = 0;
+}
+
+int av_packet_ref(AVPacket *dst, const AVPacket *src)
+{
+    int ret;
+
+    ret = av_packet_copy_props(dst, src);
+    if (ret < 0)
+        return ret;
+
+    if (!src->buf) {
+        ret = packet_alloc(&dst->buf, src->size);
+        if (ret < 0)
+            goto fail;
+        memcpy(dst->buf->data, src->data, src->size);
+
+        dst->data = dst->buf->data;
+    } else {
+        dst->buf = av_buffer_ref(src->buf);
+        if (!dst->buf) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        dst->data = src->data;
+    }
+
+    dst->size = src->size;
+
+    return 0;
+fail:
+    av_packet_free_side_data(dst);
+    return ret;
+}
+
+AVPacket *av_packet_clone(AVPacket *src)
+{
+    AVPacket *ret = av_packet_alloc();
+
+    if (!ret)
+        return ret;
+
+    if (av_packet_ref(ret, src))
+        av_packet_free(&ret);
+
+    return ret;
+}
+
+void av_packet_move_ref(AVPacket *dst, AVPacket *src)
+{
+    *dst = *src;
+    av_init_packet(src);
+    src->data = NULL;
+    src->size = 0;
+}
+
+void av_packet_rescale_ts(AVPacket *pkt, AVRational src_tb, AVRational dst_tb)
+{
+    if (pkt->pts != AV_NOPTS_VALUE)
+        pkt->pts = av_rescale_q(pkt->pts, src_tb, dst_tb);
+    if (pkt->dts != AV_NOPTS_VALUE)
+        pkt->dts = av_rescale_q(pkt->dts, src_tb, dst_tb);
+    if (pkt->duration > 0)
+        pkt->duration = av_rescale_q(pkt->duration, src_tb, dst_tb);
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (pkt->convergence_duration > 0)
+        pkt->convergence_duration = av_rescale_q(pkt->convergence_duration, src_tb, dst_tb);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+}
+
+int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type)
+{
+    uint8_t *side_data;
+    int side_data_size;
+    int i;
+
+    side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_QUALITY_STATS, &side_data_size);
+    if (!side_data) {
+        side_data_size = 4+4+8*error_count;
+        side_data = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_STATS,
+                                            side_data_size);
+    }
+
+    if (!side_data || side_data_size < 4+4+8*error_count)
+        return AVERROR(ENOMEM);
+
+    AV_WL32(side_data   , quality  );
+    side_data[4] = pict_type;
+    side_data[5] = error_count;
+    for (i = 0; i<error_count; i++)
+        AV_WL64(side_data+8 + 8*i , error[i]);
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/avpicture.c b/media/ffvpx/libavcodec/avpicture.c
new file mode 100644
index 000000000..56435f4fc
--- /dev/null
+++ b/media/ffvpx/libavcodec/avpicture.c
@@ -0,0 +1,82 @@
+/*
+ * AVPicture management routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * AVPicture management routines
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/internal.h"
+#include "libavutil/colorspace.h"
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
+                   enum AVPixelFormat pix_fmt, int width, int height)
+{
+    return av_image_fill_arrays(picture->data, picture->linesize,
+                                ptr, pix_fmt, width, height, 1);
+}
+
+int avpicture_layout(const AVPicture* src, enum AVPixelFormat pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size)
+{
+    return av_image_copy_to_buffer(dest, dest_size,
+                                   (const uint8_t * const*)src->data, src->linesize,
+                                   pix_fmt, width, height, 1);
+}
+
+int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height)
+{
+    return av_image_get_buffer_size(pix_fmt, width, height, 1);
+}
+
+int avpicture_alloc(AVPicture *picture,
+                    enum AVPixelFormat pix_fmt, int width, int height)
+{
+    int ret = av_image_alloc(picture->data, picture->linesize,
+                             width, height, pix_fmt, 1);
+    if (ret < 0) {
+        memset(picture, 0, sizeof(AVPicture));
+        return ret;
+    }
+
+    return 0;
+}
+
+void avpicture_free(AVPicture *picture)
+{
+    av_freep(&picture->data[0]);
+}
+
+void av_picture_copy(AVPicture *dst, const AVPicture *src,
+                     enum AVPixelFormat pix_fmt, int width, int height)
+{
+    av_image_copy(dst->data, dst->linesize, (const uint8_t **)src->data,
+                  src->linesize, pix_fmt, width, height);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AVPICTURE */
diff --git a/media/ffvpx/libavcodec/bit_depth_template.c b/media/ffvpx/libavcodec/bit_depth_template.c
new file mode 100644
index 000000000..80184892f
--- /dev/null
+++ b/media/ffvpx/libavcodec/bit_depth_template.c
@@ -0,0 +1,93 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mathops.h"
+#include "rnd_avg.h"
+#include "libavutil/intreadwrite.h"
+
+#ifndef BIT_DEPTH
+#define BIT_DEPTH 8
+#endif
+
+#ifdef AVCODEC_BIT_DEPTH_TEMPLATE_C
+#   undef pixel
+#   undef pixel2
+#   undef pixel4
+#   undef dctcoef
+#   undef INIT_CLIP
+#   undef no_rnd_avg_pixel4
+#   undef rnd_avg_pixel4
+#   undef AV_RN2P
+#   undef AV_RN4P
+#   undef AV_RN4PA
+#   undef AV_WN2P
+#   undef AV_WN4P
+#   undef AV_WN4PA
+#   undef CLIP
+#   undef FUNC
+#   undef FUNCC
+#   undef av_clip_pixel
+#   undef PIXEL_SPLAT_X4
+#else
+#   define AVCODEC_BIT_DEPTH_TEMPLATE_C
+#endif
+
+#if BIT_DEPTH > 8
+#   define pixel  uint16_t
+#   define pixel2 uint32_t
+#   define pixel4 uint64_t
+#   define dctcoef int32_t
+
+#   define INIT_CLIP
+#   define no_rnd_avg_pixel4 no_rnd_avg64
+#   define    rnd_avg_pixel4    rnd_avg64
+#   define AV_RN2P  AV_RN32
+#   define AV_RN4P  AV_RN64
+#   define AV_RN4PA AV_RN64A
+#   define AV_WN2P  AV_WN32
+#   define AV_WN4P  AV_WN64
+#   define AV_WN4PA AV_WN64A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+
+#   define av_clip_pixel(a) av_clip_uintp2(a, BIT_DEPTH)
+#   define CLIP(a)          av_clip_uintp2(a, BIT_DEPTH)
+#else
+#   define pixel  uint8_t
+#   define pixel2 uint16_t
+#   define pixel4 uint32_t
+#   define dctcoef int16_t
+
+#   define INIT_CLIP
+#   define no_rnd_avg_pixel4 no_rnd_avg32
+#   define    rnd_avg_pixel4    rnd_avg32
+#   define AV_RN2P  AV_RN16
+#   define AV_RN4P  AV_RN32
+#   define AV_RN4PA AV_RN32A
+#   define AV_WN2P  AV_WN16
+#   define AV_WN4P  AV_WN32
+#   define AV_WN4PA AV_WN32A
+#   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+
+#   define av_clip_pixel(a) av_clip_uint8(a)
+#   define CLIP(a) av_clip_uint8(a)
+#endif
+
+#define FUNC3(a, b, c)  a ## _ ## b ## c
+#define FUNC2(a, b, c)  FUNC3(a, b, c)
+#define FUNC(a)  FUNC2(a, BIT_DEPTH,)
+#define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
diff --git a/media/ffvpx/libavcodec/bitstream.c b/media/ffvpx/libavcodec/bitstream.c
new file mode 100644
index 000000000..562ca1cb8
--- /dev/null
+++ b/media/ffvpx/libavcodec/bitstream.c
@@ -0,0 +1,363 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream api.
+ */
+
+#include "libavutil/atomic.h"
+#include "libavutil/avassert.h"
+#include "libavutil/qsort.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "mathops.h"
+#include "put_bits.h"
+#include "vlc.h"
+
+const uint8_t ff_log2_run[41]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15,
+16,17,18,19,20,21,22,23,
+24,
+};
+
+void avpriv_align_put_bits(PutBitContext *s)
+{
+    put_bits(s, s->bit_left & 7, 0);
+}
+
+void avpriv_put_string(PutBitContext *pb, const char *string,
+                       int terminate_string)
+{
+    while (*string) {
+        put_bits(pb, 8, *string);
+        string++;
+    }
+    if (terminate_string)
+        put_bits(pb, 8, 0);
+}
+
+void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
+{
+    int words = length >> 4;
+    int bits  = length & 15;
+    int i;
+
+    if (length == 0)
+        return;
+
+    av_assert0(length <= put_bits_left(pb));
+
+    if (CONFIG_SMALL || words < 16 || put_bits_count(pb) & 7) {
+        for (i = 0; i < words; i++)
+            put_bits(pb, 16, AV_RB16(src + 2 * i));
+    } else {
+        for (i = 0; put_bits_count(pb) & 31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(put_bits_ptr(pb), src + i, 2 * words - i);
+        skip_put_bytes(pb, 2 * words - i);
+    }
+
+    put_bits(pb, bits, AV_RB16(src + 2 * words) >> (16 - bits));
+}
+
+/* VLC decoding */
+
+#define GET_DATA(v, table, i, wrap, size)                   \
+{                                                           \
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap; \
+    switch(size) {                                          \
+    case 1:                                                 \
+        v = *(const uint8_t *)ptr;                          \
+        break;                                              \
+    case 2:                                                 \
+        v = *(const uint16_t *)ptr;                         \
+        break;                                              \
+    default:                                                \
+        v = *(const uint32_t *)ptr;                         \
+        break;                                              \
+    }                                                       \
+}
+
+
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index = vlc->table_size;
+
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        if (use_static)
+            abort(); // cannot do anything, init_vlc() is used with too little memory
+        vlc->table_allocated += (1 << vlc->bits);
+        vlc->table = av_realloc_f(vlc->table, vlc->table_allocated, sizeof(VLC_TYPE) * 2);
+        if (!vlc->table) {
+            vlc->table_allocated = 0;
+            vlc->table_size = 0;
+            return AVERROR(ENOMEM);
+        }
+        memset(vlc->table + vlc->table_allocated - (1 << vlc->bits), 0, sizeof(VLC_TYPE) * 2 << vlc->bits);
+    }
+    return index;
+}
+
+static av_always_inline uint32_t bitswap_32(uint32_t x)
+{
+    return (uint32_t)ff_reverse[ x        & 0xFF] << 24 |
+           (uint32_t)ff_reverse[(x >> 8)  & 0xFF] << 16 |
+           (uint32_t)ff_reverse[(x >> 16) & 0xFF] << 8  |
+           (uint32_t)ff_reverse[ x >> 24];
+}
+
+typedef struct VLCcode {
+    uint8_t bits;
+    uint16_t symbol;
+    /** codeword, with the first bit-to-be-read in the msb
+     * (even if intended for a little-endian bitstream reader) */
+    uint32_t code;
+} VLCcode;
+
+static int compare_vlcspec(const void *a, const void *b)
+{
+    const VLCcode *sa = a, *sb = b;
+    return (sa->code >> 1) - (sb->code >> 1);
+}
+/**
+ * Build VLC decoding tables suitable for use with get_vlc().
+ *
+ * @param vlc            the context to be initialized
+ *
+ * @param table_nb_bits  max length of vlc codes to store directly in this table
+ *                       (Longer codes are delegated to subtables.)
+ *
+ * @param nb_codes       number of elements in codes[]
+ *
+ * @param codes          descriptions of the vlc codes
+ *                       These must be ordered such that codes going into the same subtable are contiguous.
+ *                       Sorting by VLCcode.code is sufficient, though not necessary.
+ */
+static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
+                       VLCcode *codes, int flags)
+{
+    int table_size, table_index, index, code_prefix, symbol, subtable_bits;
+    int i, j, k, n, nb, inc;
+    uint32_t code;
+    volatile VLC_TYPE (* volatile table)[2]; // the double volatile is needed to prevent an internal compiler error in gcc 4.2
+
+    table_size = 1 << table_nb_bits;
+    if (table_nb_bits > 30)
+       return -1;
+    table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
+    ff_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
+    if (table_index < 0)
+        return table_index;
+    table = (volatile VLC_TYPE (*)[2])&vlc->table[table_index];
+
+    /* first pass: map codes and compute auxiliary table sizes */
+    for (i = 0; i < nb_codes; i++) {
+        n      = codes[i].bits;
+        code   = codes[i].code;
+        symbol = codes[i].symbol;
+        ff_dlog(NULL, "i=%d n=%d code=0x%x\n", i, n, code);
+        if (n <= table_nb_bits) {
+            /* no need to add another table */
+            j = code >> (32 - table_nb_bits);
+            nb = 1 << (table_nb_bits - n);
+            inc = 1;
+            if (flags & INIT_VLC_LE) {
+                j = bitswap_32(code);
+                inc = 1 << n;
+            }
+            for (k = 0; k < nb; k++) {
+                int bits = table[j][1];
+                ff_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n);
+                if (bits != 0 && bits != n) {
+                    av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                table[j][1] = n; //bits
+                table[j][0] = symbol;
+                j += inc;
+            }
+        } else {
+            /* fill auxiliary table recursively */
+            n -= table_nb_bits;
+            code_prefix = code >> (32 - table_nb_bits);
+            subtable_bits = n;
+            codes[i].bits = n;
+            codes[i].code = code << table_nb_bits;
+            for (k = i+1; k < nb_codes; k++) {
+                n = codes[k].bits - table_nb_bits;
+                if (n <= 0)
+                    break;
+                code = codes[k].code;
+                if (code >> (32 - table_nb_bits) != code_prefix)
+                    break;
+                codes[k].bits = n;
+                codes[k].code = code << table_nb_bits;
+                subtable_bits = FFMAX(subtable_bits, n);
+            }
+            subtable_bits = FFMIN(subtable_bits, table_nb_bits);
+            j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
+            table[j][1] = -subtable_bits;
+            ff_dlog(NULL, "%4x: n=%d (subtable)\n",
+                    j, codes[i].bits + table_nb_bits);
+            index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
+            if (index < 0)
+                return index;
+            /* note: realloc has been done, so reload tables */
+            table = (volatile VLC_TYPE (*)[2])&vlc->table[table_index];
+            table[j][0] = index; //code
+            i = k-1;
+        }
+    }
+
+    for (i = 0; i < table_size; i++) {
+        if (table[i][1] == 0) //bits
+            table[i][0] = -1; //codes
+    }
+
+    return table_index;
+}
+
+
+/* Build VLC decoding tables suitable for use with get_vlc().
+
+   'nb_bits' sets the decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+
+   'nb_codes' : number of vlcs codes
+
+   'bits' : table which gives the size (in bits) of each vlc code.
+
+   'codes' : table which gives the bit pattern of of each vlc code.
+
+   'symbols' : table which gives the values to be returned from get_vlc().
+
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+
+   'wrap' and 'size' make it possible to use any memory configuration and types
+   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
+
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if ff_free_vlc() will be used.
+*/
+int ff_init_vlc_sparse(VLC *vlc_arg, int nb_bits, int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       const void *symbols, int symbols_wrap, int symbols_size,
+                       int flags)
+{
+    VLCcode *buf;
+    int i, j, ret;
+    VLCcode localbuf[1500]; // the maximum currently needed is 1296 by rv34
+    VLC localvlc, *vlc;
+
+    vlc = vlc_arg;
+    vlc->bits = nb_bits;
+    if (flags & INIT_VLC_USE_NEW_STATIC) {
+        av_assert0(nb_codes + 1 <= FF_ARRAY_ELEMS(localbuf));
+        buf = localbuf;
+        localvlc = *vlc_arg;
+        vlc = &localvlc;
+        vlc->table_size = 0;
+    } else {
+        vlc->table           = NULL;
+        vlc->table_allocated = 0;
+        vlc->table_size      = 0;
+
+        buf = av_malloc_array((nb_codes + 1), sizeof(VLCcode));
+        if (!buf)
+            return AVERROR(ENOMEM);
+    }
+
+
+    av_assert0(symbols_size <= 2 || !symbols);
+    j = 0;
+#define COPY(condition)\
+    for (i = 0; i < nb_codes; i++) {                                        \
+        GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);               \
+        if (!(condition))                                                   \
+            continue;                                                       \
+        if (buf[j].bits > 3*nb_bits || buf[j].bits>32) {                    \
+            av_log(NULL, AV_LOG_ERROR, "Too long VLC (%d) in init_vlc\n", buf[j].bits);\
+            if (!(flags & INIT_VLC_USE_NEW_STATIC))                         \
+                av_free(buf);                                               \
+            return -1;                                                      \
+        }                                                                   \
+        GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);            \
+        if (buf[j].code >= (1LL<<buf[j].bits)) {                            \
+            av_log(NULL, AV_LOG_ERROR, "Invalid code %x for %d in init_vlc\n", buf[j].code, i);\
+            if (!(flags & INIT_VLC_USE_NEW_STATIC))                         \
+                av_free(buf);                                               \
+            return -1;                                                      \
+        }                                                                   \
+        if (flags & INIT_VLC_LE)                                            \
+            buf[j].code = bitswap_32(buf[j].code);                          \
+        else                                                                \
+            buf[j].code <<= 32 - buf[j].bits;                               \
+        if (symbols)                                                        \
+            GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size) \
+        else                                                                \
+            buf[j].symbol = i;                                              \
+        j++;                                                                \
+    }
+    COPY(buf[j].bits > nb_bits);
+    // qsort is the slowest part of init_vlc, and could probably be improved or avoided
+    AV_QSORT(buf, j, struct VLCcode, compare_vlcspec);
+    COPY(buf[j].bits && buf[j].bits <= nb_bits);
+    nb_codes = j;
+
+    ret = build_table(vlc, nb_bits, nb_codes, buf, flags);
+
+    if (flags & INIT_VLC_USE_NEW_STATIC) {
+        if(vlc->table_size != vlc->table_allocated)
+            av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+
+        av_assert0(ret >= 0);
+        *vlc_arg = *vlc;
+    } else {
+        av_free(buf);
+        if (ret < 0) {
+            av_freep(&vlc->table);
+            return ret;
+        }
+    }
+    return 0;
+}
+
+
+void ff_free_vlc(VLC *vlc)
+{
+    av_freep(&vlc->table);
+}
diff --git a/media/ffvpx/libavcodec/blockdsp.h b/media/ffvpx/libavcodec/blockdsp.h
new file mode 100644
index 000000000..95e1d0f32
--- /dev/null
+++ b/media/ffvpx/libavcodec/blockdsp.h
@@ -0,0 +1,49 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BLOCKDSP_H
+#define AVCODEC_BLOCKDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "version.h"
+
+/* add and put pixel (decoding)
+ * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
+ * h for op_pixels_func is limited to { width / 2, width },
+ * but never larger than 16 and never smaller than 4. */
+typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
+                             uint8_t value, int line_size, int h);
+
+typedef struct BlockDSPContext {
+    void (*clear_block)(int16_t *block /* align 16 */);
+    void (*clear_blocks)(int16_t *blocks /* align 16 */);
+
+    op_fill_func fill_block_tab[2];
+} BlockDSPContext;
+
+void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
+
+void ff_blockdsp_init_alpha(BlockDSPContext *c);
+void ff_blockdsp_init_arm(BlockDSPContext *c);
+void ff_blockdsp_init_ppc(BlockDSPContext *c);
+void ff_blockdsp_init_x86(BlockDSPContext *c, AVCodecContext *avctx);
+void ff_blockdsp_init_mips(BlockDSPContext *c);
+
+#endif /* AVCODEC_BLOCKDSP_H */
diff --git a/media/ffvpx/libavcodec/bsf.h b/media/ffvpx/libavcodec/bsf.h
new file mode 100644
index 000000000..3435df5d8
--- /dev/null
+++ b/media/ffvpx/libavcodec/bsf.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BSF_H
+#define AVCODEC_BSF_H
+
+#include "avcodec.h"
+
+/**
+ * Called by the bitstream filters to get the next packet for filtering.
+ * The filter is responsible for either freeing the packet or passing it to the
+ * caller.
+ */
+int ff_bsf_get_packet(AVBSFContext *ctx, AVPacket **pkt);
+
+const AVClass *ff_bsf_child_class_next(const AVClass *prev);
+
+#endif /* AVCODEC_BSF_H */
diff --git a/media/ffvpx/libavcodec/bytestream.h b/media/ffvpx/libavcodec/bytestream.h
new file mode 100644
index 000000000..7c05ea6cf
--- /dev/null
+++ b/media/ffvpx/libavcodec/bytestream.h
@@ -0,0 +1,376 @@
+/*
+ * Bytestream functions
+ * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
+ * Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_BYTESTREAM_H
+#define AVCODEC_BYTESTREAM_H
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+typedef struct GetByteContext {
+    const uint8_t *buffer, *buffer_end, *buffer_start;
+} GetByteContext;
+
+typedef struct PutByteContext {
+    uint8_t *buffer, *buffer_end, *buffer_start;
+    int eof;
+} PutByteContext;
+
+#define DEF(type, name, bytes, read, write)                                  \
+static av_always_inline type bytestream_get_ ## name(const uint8_t **b)        \
+{                                                                              \
+    (*b) += bytes;                                                             \
+    return read(*b - bytes);                                                   \
+}                                                                              \
+static av_always_inline void bytestream_put_ ## name(uint8_t **b,              \
+                                                     const type value)         \
+{                                                                              \
+    write(*b, value);                                                          \
+    (*b) += bytes;                                                             \
+}                                                                              \
+static av_always_inline void bytestream2_put_ ## name ## u(PutByteContext *p,  \
+                                                           const type value)   \
+{                                                                              \
+    bytestream_put_ ## name(&p->buffer, value);                                \
+}                                                                              \
+static av_always_inline void bytestream2_put_ ## name(PutByteContext *p,       \
+                                                      const type value)        \
+{                                                                              \
+    if (!p->eof && (p->buffer_end - p->buffer >= bytes)) {                     \
+        write(p->buffer, value);                                               \
+        p->buffer += bytes;                                                    \
+    } else                                                                     \
+        p->eof = 1;                                                            \
+}                                                                              \
+static av_always_inline type bytestream2_get_ ## name ## u(GetByteContext *g)  \
+{                                                                              \
+    return bytestream_get_ ## name(&g->buffer);                                \
+}                                                                              \
+static av_always_inline type bytestream2_get_ ## name(GetByteContext *g)       \
+{                                                                              \
+    if (g->buffer_end - g->buffer < bytes) {                                   \
+        g->buffer = g->buffer_end;                                             \
+        return 0;                                                              \
+    }                                                                          \
+    return bytestream2_get_ ## name ## u(g);                                   \
+}                                                                              \
+static av_always_inline type bytestream2_peek_ ## name(GetByteContext *g)      \
+{                                                                              \
+    if (g->buffer_end - g->buffer < bytes)                                     \
+        return 0;                                                              \
+    return read(g->buffer);                                                    \
+}
+
+DEF(uint64_t,     le64, 8, AV_RL64, AV_WL64)
+DEF(unsigned int, le32, 4, AV_RL32, AV_WL32)
+DEF(unsigned int, le24, 3, AV_RL24, AV_WL24)
+DEF(unsigned int, le16, 2, AV_RL16, AV_WL16)
+DEF(uint64_t,     be64, 8, AV_RB64, AV_WB64)
+DEF(unsigned int, be32, 4, AV_RB32, AV_WB32)
+DEF(unsigned int, be24, 3, AV_RB24, AV_WB24)
+DEF(unsigned int, be16, 2, AV_RB16, AV_WB16)
+DEF(unsigned int, byte, 1, AV_RB8 , AV_WB8)
+
+#if HAVE_BIGENDIAN
+#   define bytestream2_get_ne16  bytestream2_get_be16
+#   define bytestream2_get_ne24  bytestream2_get_be24
+#   define bytestream2_get_ne32  bytestream2_get_be32
+#   define bytestream2_get_ne64  bytestream2_get_be64
+#   define bytestream2_get_ne16u bytestream2_get_be16u
+#   define bytestream2_get_ne24u bytestream2_get_be24u
+#   define bytestream2_get_ne32u bytestream2_get_be32u
+#   define bytestream2_get_ne64u bytestream2_get_be64u
+#   define bytestream2_put_ne16  bytestream2_put_be16
+#   define bytestream2_put_ne24  bytestream2_put_be24
+#   define bytestream2_put_ne32  bytestream2_put_be32
+#   define bytestream2_put_ne64  bytestream2_put_be64
+#   define bytestream2_peek_ne16 bytestream2_peek_be16
+#   define bytestream2_peek_ne24 bytestream2_peek_be24
+#   define bytestream2_peek_ne32 bytestream2_peek_be32
+#   define bytestream2_peek_ne64 bytestream2_peek_be64
+#else
+#   define bytestream2_get_ne16  bytestream2_get_le16
+#   define bytestream2_get_ne24  bytestream2_get_le24
+#   define bytestream2_get_ne32  bytestream2_get_le32
+#   define bytestream2_get_ne64  bytestream2_get_le64
+#   define bytestream2_get_ne16u bytestream2_get_le16u
+#   define bytestream2_get_ne24u bytestream2_get_le24u
+#   define bytestream2_get_ne32u bytestream2_get_le32u
+#   define bytestream2_get_ne64u bytestream2_get_le64u
+#   define bytestream2_put_ne16  bytestream2_put_le16
+#   define bytestream2_put_ne24  bytestream2_put_le24
+#   define bytestream2_put_ne32  bytestream2_put_le32
+#   define bytestream2_put_ne64  bytestream2_put_le64
+#   define bytestream2_peek_ne16 bytestream2_peek_le16
+#   define bytestream2_peek_ne24 bytestream2_peek_le24
+#   define bytestream2_peek_ne32 bytestream2_peek_le32
+#   define bytestream2_peek_ne64 bytestream2_peek_le64
+#endif
+
+static av_always_inline void bytestream2_init(GetByteContext *g,
+                                              const uint8_t *buf,
+                                              int buf_size)
+{
+    av_assert0(buf_size >= 0);
+    g->buffer       = buf;
+    g->buffer_start = buf;
+    g->buffer_end   = buf + buf_size;
+}
+
+static av_always_inline void bytestream2_init_writer(PutByteContext *p,
+                                                     uint8_t *buf,
+                                                     int buf_size)
+{
+    av_assert0(buf_size >= 0);
+    p->buffer       = buf;
+    p->buffer_start = buf;
+    p->buffer_end   = buf + buf_size;
+    p->eof          = 0;
+}
+
+static av_always_inline unsigned int bytestream2_get_bytes_left(GetByteContext *g)
+{
+    return g->buffer_end - g->buffer;
+}
+
+static av_always_inline unsigned int bytestream2_get_bytes_left_p(PutByteContext *p)
+{
+    return p->buffer_end - p->buffer;
+}
+
+static av_always_inline void bytestream2_skip(GetByteContext *g,
+                                              unsigned int size)
+{
+    g->buffer += FFMIN(g->buffer_end - g->buffer, size);
+}
+
+static av_always_inline void bytestream2_skipu(GetByteContext *g,
+                                               unsigned int size)
+{
+    g->buffer += size;
+}
+
+static av_always_inline void bytestream2_skip_p(PutByteContext *p,
+                                                unsigned int size)
+{
+    int size2;
+    if (p->eof)
+        return;
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+    p->buffer += size2;
+}
+
+static av_always_inline int bytestream2_tell(GetByteContext *g)
+{
+    return (int)(g->buffer - g->buffer_start);
+}
+
+static av_always_inline int bytestream2_tell_p(PutByteContext *p)
+{
+    return (int)(p->buffer - p->buffer_start);
+}
+
+static av_always_inline int bytestream2_size(GetByteContext *g)
+{
+    return (int)(g->buffer_end - g->buffer_start);
+}
+
+static av_always_inline int bytestream2_size_p(PutByteContext *p)
+{
+    return (int)(p->buffer_end - p->buffer_start);
+}
+
+static av_always_inline int bytestream2_seek(GetByteContext *g,
+                                             int offset,
+                                             int whence)
+{
+    switch (whence) {
+    case SEEK_CUR:
+        offset     = av_clip(offset, -(g->buffer - g->buffer_start),
+                             g->buffer_end - g->buffer);
+        g->buffer += offset;
+        break;
+    case SEEK_END:
+        offset    = av_clip(offset, -(g->buffer_end - g->buffer_start), 0);
+        g->buffer = g->buffer_end + offset;
+        break;
+    case SEEK_SET:
+        offset    = av_clip(offset, 0, g->buffer_end - g->buffer_start);
+        g->buffer = g->buffer_start + offset;
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+    return bytestream2_tell(g);
+}
+
+static av_always_inline int bytestream2_seek_p(PutByteContext *p,
+                                               int offset,
+                                               int whence)
+{
+    p->eof = 0;
+    switch (whence) {
+    case SEEK_CUR:
+        if (p->buffer_end - p->buffer < offset)
+            p->eof = 1;
+        offset     = av_clip(offset, -(p->buffer - p->buffer_start),
+                             p->buffer_end - p->buffer);
+        p->buffer += offset;
+        break;
+    case SEEK_END:
+        if (offset > 0)
+            p->eof = 1;
+        offset    = av_clip(offset, -(p->buffer_end - p->buffer_start), 0);
+        p->buffer = p->buffer_end + offset;
+        break;
+    case SEEK_SET:
+        if (p->buffer_end - p->buffer_start < offset)
+            p->eof = 1;
+        offset    = av_clip(offset, 0, p->buffer_end - p->buffer_start);
+        p->buffer = p->buffer_start + offset;
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+    return bytestream2_tell_p(p);
+}
+
+static av_always_inline unsigned int bytestream2_get_buffer(GetByteContext *g,
+                                                            uint8_t *dst,
+                                                            unsigned int size)
+{
+    int size2 = FFMIN(g->buffer_end - g->buffer, size);
+    memcpy(dst, g->buffer, size2);
+    g->buffer += size2;
+    return size2;
+}
+
+static av_always_inline unsigned int bytestream2_get_bufferu(GetByteContext *g,
+                                                             uint8_t *dst,
+                                                             unsigned int size)
+{
+    memcpy(dst, g->buffer, size);
+    g->buffer += size;
+    return size;
+}
+
+static av_always_inline unsigned int bytestream2_put_buffer(PutByteContext *p,
+                                                            const uint8_t *src,
+                                                            unsigned int size)
+{
+    int size2;
+    if (p->eof)
+        return 0;
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+    memcpy(p->buffer, src, size2);
+    p->buffer += size2;
+    return size2;
+}
+
+static av_always_inline unsigned int bytestream2_put_bufferu(PutByteContext *p,
+                                                             const uint8_t *src,
+                                                             unsigned int size)
+{
+    memcpy(p->buffer, src, size);
+    p->buffer += size;
+    return size;
+}
+
+static av_always_inline void bytestream2_set_buffer(PutByteContext *p,
+                                                    const uint8_t c,
+                                                    unsigned int size)
+{
+    int size2;
+    if (p->eof)
+        return;
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+    memset(p->buffer, c, size2);
+    p->buffer += size2;
+}
+
+static av_always_inline void bytestream2_set_bufferu(PutByteContext *p,
+                                                     const uint8_t c,
+                                                     unsigned int size)
+{
+    memset(p->buffer, c, size);
+    p->buffer += size;
+}
+
+static av_always_inline unsigned int bytestream2_get_eof(PutByteContext *p)
+{
+    return p->eof;
+}
+
+static av_always_inline unsigned int bytestream2_copy_bufferu(PutByteContext *p,
+                                                              GetByteContext *g,
+                                                              unsigned int size)
+{
+    memcpy(p->buffer, g->buffer, size);
+    p->buffer += size;
+    g->buffer += size;
+    return size;
+}
+
+static av_always_inline unsigned int bytestream2_copy_buffer(PutByteContext *p,
+                                                             GetByteContext *g,
+                                                             unsigned int size)
+{
+    int size2;
+
+    if (p->eof)
+        return 0;
+    size  = FFMIN(g->buffer_end - g->buffer, size);
+    size2 = FFMIN(p->buffer_end - p->buffer, size);
+    if (size2 != size)
+        p->eof = 1;
+
+    return bytestream2_copy_bufferu(p, g, size2);
+}
+
+static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b,
+                                                           uint8_t *dst,
+                                                           unsigned int size)
+{
+    memcpy(dst, *b, size);
+    (*b) += size;
+    return size;
+}
+
+static av_always_inline void bytestream_put_buffer(uint8_t **b,
+                                                   const uint8_t *src,
+                                                   unsigned int size)
+{
+    memcpy(*b, src, size);
+    (*b) += size;
+}
+
+#endif /* AVCODEC_BYTESTREAM_H */
diff --git a/media/ffvpx/libavcodec/codec_desc.c b/media/ffvpx/libavcodec/codec_desc.c
new file mode 100644
index 000000000..9d94b72ed
--- /dev/null
+++ b/media/ffvpx/libavcodec/codec_desc.c
@@ -0,0 +1,2989 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * This table was generated from the long and short names of AVCodecs
+ * please see the respective codec sources for authorship
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <string.h>
+
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "avcodec.h"
+#include "profiles.h"
+#include "version.h"
+
+#define MT(...) (const char *const[]){ __VA_ARGS__, NULL }
+
+static const AVCodecDescriptor codec_descriptors[] = {
+    /* video codecs */
+    {
+        .id        = AV_CODEC_ID_MPEG1VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpeg1video",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-1 video"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_MPEG2VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpeg2video",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-2 video"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg2_video_profiles),
+    },
+#if FF_API_XVMC
+    {
+        .id        = AV_CODEC_ID_MPEG2VIDEO_XVMC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpegvideo_xvmc",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-1/2 video XvMC (X-Video Motion Compensation)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+#endif /* FF_API_XVMC */
+    {
+        .id        = AV_CODEC_ID_H261,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h261",
+        .long_name = NULL_IF_CONFIG_SMALL("H.261"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_H263,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h263",
+        .long_name = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996, H.263+ / H.263-1998 / H.263 version 2"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_RV10,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv10",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 1.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RV20,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv20",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 2.0"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_MJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mjpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Motion JPEG"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .mime_types= MT("image/jpeg"),
+    },
+    {
+        .id        = AV_CODEC_ID_MJPEGB,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mjpegb",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple MJPEG-B"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MPEG4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mpeg4",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_mpeg4_video_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_RAWVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rawvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("raw video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSMPEG4V1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msmpeg4v1",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSMPEG4V2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msmpeg4v2",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSMPEG4V3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msmpeg4v3",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv1",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMV2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv2",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 8"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_H263P,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h263p",
+        .long_name = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_H263I,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h263i",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel H.263"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_FLV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flv1",
+        .long_name = NULL_IF_CONFIG_SMALL("FLV / Sorenson Spark / Sorenson H.263 (Flash Video)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SVQ1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "svq1",
+        .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SVQ3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "svq3",
+        .long_name = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_DVVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dvvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HUFFYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "huffyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("HuffYUV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_CYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("Creative YUV (CYUV)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_H264,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "h264",
+        .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_h264_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo3",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo 3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp3",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_THEORA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "theora",
+        .long_name = NULL_IF_CONFIG_SMALL("Theora"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ASV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "asv1",
+        .long_name = NULL_IF_CONFIG_SMALL("ASUS V1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ASV2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "asv2",
+        .long_name = NULL_IF_CONFIG_SMALL("ASUS V2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FFV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ffv1",
+        .long_name = NULL_IF_CONFIG_SMALL("FFmpeg video codec #1"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_4XM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "4xm",
+        .long_name = NULL_IF_CONFIG_SMALL("4X Movie"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VCR1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vcr1",
+        .long_name = NULL_IF_CONFIG_SMALL("ATI VCR1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CLJR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cljr",
+        .long_name = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MDEC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mdec",
+        .long_name = NULL_IF_CONFIG_SMALL("Sony PlayStation MDEC (Motion DECoder)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ROQ,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "roq",
+        .long_name = NULL_IF_CONFIG_SMALL("id RoQ video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "interplayvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay MVE video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XAN_WC3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xan_wc3",
+        .long_name = NULL_IF_CONFIG_SMALL("Wing Commander III / Xan"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XAN_WC4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xan_wc4",
+        .long_name = NULL_IF_CONFIG_SMALL("Wing Commander IV / Xxan"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RPZA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rpza",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime video (RPZA)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CINEPAK,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cinepak",
+        .long_name = NULL_IF_CONFIG_SMALL("Cinepak"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WS_VQA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ws_vqa",
+        .long_name = NULL_IF_CONFIG_SMALL("Westwood Studios VQA (Vector Quantized Animation) video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msrle",
+        .long_name = NULL_IF_CONFIG_SMALL("Microsoft RLE"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSVIDEO1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msvideo1",
+        .long_name = NULL_IF_CONFIG_SMALL("Microsoft Video 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IDCIN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "idcin",
+        .long_name = NULL_IF_CONFIG_SMALL("id Quake II CIN video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_8BPS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "8bps",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime 8BPS video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SMC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smc",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime Graphics (SMC)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FLIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flic",
+        .long_name = NULL_IF_CONFIG_SMALL("Autodesk Animator Flic video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEMOTION1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion1",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 1.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VMDVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vmdvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Sierra VMD video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSZH,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mszh",
+        .long_name = NULL_IF_CONFIG_SMALL("LCL (LossLess Codec Library) MSZH"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ZLIB,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "zlib",
+        .long_name = NULL_IF_CONFIG_SMALL("LCL (LossLess Codec Library) ZLIB"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_QTRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qtrle",
+        .long_name = NULL_IF_CONFIG_SMALL("QuickTime Animation (RLE) video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SNOW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "snow",
+        .long_name = NULL_IF_CONFIG_SMALL("Snow"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tscc",
+        .long_name = NULL_IF_CONFIG_SMALL("TechSmith Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ULTI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ulti",
+        .long_name = NULL_IF_CONFIG_SMALL("IBM UltiMotion"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QDRAW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qdraw",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple QuickDraw"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VIXL,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vixl",
+        .long_name = NULL_IF_CONFIG_SMALL("Miro VideoXL"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "qpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Q-team QPEG"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FFVHUFF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ffvhuff",
+        .long_name = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RV30,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv30",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 3.0"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_RV40,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rv40",
+        .long_name = NULL_IF_CONFIG_SMALL("RealVideo 4.0"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_VC1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vc1",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vc1_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_WMV3,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv3",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vc1_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_LOCO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "loco",
+        .long_name = NULL_IF_CONFIG_SMALL("LOCO"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WNV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wnv1",
+        .long_name = NULL_IF_CONFIG_SMALL("Winnov WNV1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AASC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aasc",
+        .long_name = NULL_IF_CONFIG_SMALL("Autodesk RLE"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo2",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FRAPS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fraps",
+        .long_name = NULL_IF_CONFIG_SMALL("Fraps"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEMOTION2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion2",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BMP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bmp",
+        .long_name = NULL_IF_CONFIG_SMALL("BMP (Windows and OS/2 bitmap)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-ms-bmp"),
+    },
+    {
+        .id        = AV_CODEC_ID_CSCD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cscd",
+        .long_name = NULL_IF_CONFIG_SMALL("CamStudio"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MMVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mmvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("American Laser Games MM Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ZMBV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "zmbv",
+        .long_name = NULL_IF_CONFIG_SMALL("Zip Motion Blocks Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_AVS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avs",
+        .long_name = NULL_IF_CONFIG_SMALL("AVS (Audio Video Standard) video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMACKVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smackvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Smacker video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_NUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "nuv",
+        .long_name = NULL_IF_CONFIG_SMALL("NuppelVideo/RTJPEG"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_KMVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "kmvc",
+        .long_name = NULL_IF_CONFIG_SMALL("Karl Morton's video codec"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FLASHSV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flashsv",
+        .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video v1"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_CAVS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cavs",
+        .long_name = NULL_IF_CONFIG_SMALL("Chinese AVS (Audio Video Standard) (AVS1-P2, JiZhun profile)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_JPEG2000,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jpeg2000",
+        .long_name = NULL_IF_CONFIG_SMALL("JPEG 2000"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/jp2"),
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_jpeg2000_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_VMNC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vmnc",
+        .long_name = NULL_IF_CONFIG_SMALL("VMware Screen Codec / VMware Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VP5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp5",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP5"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP6,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp6",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP6"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP6F,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp6f",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP6 (Flash version)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSICINVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dsicinvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Delphine Software International CIN video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TIERTEXSEQVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tiertexseqvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Tiertex Limited SEQ video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DXA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxa",
+        .long_name = NULL_IF_CONFIG_SMALL("Feeble Files/ScummVM DXA"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DNXHD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dnxhd",
+        .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_THP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "thp",
+        .long_name = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_C93,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "c93",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay C93"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BETHSOFTVID,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bethsoftvid",
+        .long_name = NULL_IF_CONFIG_SMALL("Bethesda VID video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP6A,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp6a",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP6 (Flash version, with alpha channel)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "amv",
+        .long_name = NULL_IF_CONFIG_SMALL("AMV Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VB,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vb",
+        .long_name = NULL_IF_CONFIG_SMALL("Beam Software VB"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo4",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INDEO5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "indeo5",
+        .long_name = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 5"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MIMIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mimic",
+        .long_name = NULL_IF_CONFIG_SMALL("Mimic"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RL2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rl2",
+        .long_name = NULL_IF_CONFIG_SMALL("RL2 video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ESCAPE124,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "escape124",
+        .long_name = NULL_IF_CONFIG_SMALL("Escape 124"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DAALA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "daala",
+        .long_name = NULL_IF_CONFIG_SMALL("Daala"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DIRAC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dirac",
+        .long_name = NULL_IF_CONFIG_SMALL("Dirac"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS | AV_CODEC_PROP_REORDER,
+    },
+    {
+        .id        = AV_CODEC_ID_BFI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bfi",
+        .long_name = NULL_IF_CONFIG_SMALL("Brute Force & Ignorance"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cmv",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts CMV video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MOTIONPIXELS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "motionpixels",
+        .long_name = NULL_IF_CONFIG_SMALL("Motion Pixels video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TGV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tgv",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGV video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TGQ,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tgq",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TGQ video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TQI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tqi",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts TQI video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AURA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aura",
+        .long_name = NULL_IF_CONFIG_SMALL("Auravision AURA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AURA2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aura2",
+        .long_name = NULL_IF_CONFIG_SMALL("Auravision Aura 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_V210X,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v210x",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TMV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tmv",
+        .long_name = NULL_IF_CONFIG_SMALL("8088flex TMV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_V210,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v210",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MAD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mad",
+        .long_name = NULL_IF_CONFIG_SMALL("Electronic Arts Madcow Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FRWU,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "frwu",
+        .long_name = NULL_IF_CONFIG_SMALL("Forward Uncompressed"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_FLASHSV2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "flashsv2",
+        .long_name = NULL_IF_CONFIG_SMALL("Flash Screen Video v2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CDGRAPHICS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cdgraphics",
+        .long_name = NULL_IF_CONFIG_SMALL("CD Graphics video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_R210,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "r210",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed RGB 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ANM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "anm",
+        .long_name = NULL_IF_CONFIG_SMALL("Deluxe Paint Animation"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BINKVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "binkvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Bink video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IFF_ILBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "iff_ilbm",
+        .long_name = NULL_IF_CONFIG_SMALL("IFF ACBM/ANIM/DEEP/ILBM/PBM/RGB8/RGBN"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_KGV1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "kgv1",
+        .long_name = NULL_IF_CONFIG_SMALL("Kega Game Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_YOP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "yop",
+        .long_name = NULL_IF_CONFIG_SMALL("Psygnosis YOP Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP8,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp8",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP9,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp9",
+        .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
+        .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_PICTOR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pictor",
+        .long_name = NULL_IF_CONFIG_SMALL("Pictor/PC Paint"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_A64_MULTI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "a64_multi",
+        .long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_A64_MULTI5,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "a64_multi5",
+        .long_name = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64, extended with 5th color (colram)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_R10K,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "r10k",
+        .long_name = NULL_IF_CONFIG_SMALL("AJA Kona 10-bit RGB Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_M101,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "m101",
+        .long_name = NULL_IF_CONFIG_SMALL("Matrox Uncompressed SD"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MVC1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvc1",
+        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 1"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MVC2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mvc2",
+        .long_name = NULL_IF_CONFIG_SMALL("Silicon Graphics Motion Video Compressor 2"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MXPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mxpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Mobotix MxPEG video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_LAGARITH,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "lagarith",
+        .long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PRORES,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "prores",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_JV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jv",
+        .long_name = NULL_IF_CONFIG_SMALL("Bitmap Brothers JV video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DFA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dfa",
+        .long_name = NULL_IF_CONFIG_SMALL("Chronomaster DFA"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_UTVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "utvideo",
+        .long_name = NULL_IF_CONFIG_SMALL("Ut Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_BMV_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bmv_video",
+        .long_name = NULL_IF_CONFIG_SMALL("Discworld II BMV video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_VBLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vble",
+        .long_name = NULL_IF_CONFIG_SMALL("VBLE Lossless Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DXTORY,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxtory",
+        .long_name = NULL_IF_CONFIG_SMALL("Dxtory"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_V410,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v410",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:4:4 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_CDXL,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cdxl",
+        .long_name = NULL_IF_CONFIG_SMALL("Commodore CDXL video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ZEROCODEC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "zerocodec",
+        .long_name = NULL_IF_CONFIG_SMALL("ZeroCodec Lossless Video"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSS1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mss1",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Screen 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MSA1,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "msa1",
+        .long_name = NULL_IF_CONFIG_SMALL("MS ATC Screen"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TSCC2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tscc2",
+        .long_name = NULL_IF_CONFIG_SMALL("TechSmith Screen Codec 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MTS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mts2",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Expression Encoder Screen"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CLLC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cllc",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus Lossless Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MSS2,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "mss2",
+        .long_name = NULL_IF_CONFIG_SMALL("MS Windows Media Video V9 Screen"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "aic",
+        .long_name = NULL_IF_CONFIG_SMALL("Apple Intermediate Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_Y41P,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "y41p",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_ESCAPE130,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "escape130",
+        .long_name = NULL_IF_CONFIG_SMALL("Escape 130"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AVRP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avrp",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_012V,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "012v",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_AVUI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avui",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_AYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ayuv",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed MS 4:4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_TARGA_Y216,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "targa_y216",
+        .long_name = NULL_IF_CONFIG_SMALL("Pinnacle TARGA CineWave YUV16"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_V308,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v308",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_V408,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v408",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed QT 4:4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_YUV4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "yuv4",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:2:0"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_AVRN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avrn",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid AVI Codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_CPIA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cpia",
+        .long_name = NULL_IF_CONFIG_SMALL("CPiA video format"),
+    },
+    {
+        .id        = AV_CODEC_ID_XFACE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xface",
+        .long_name = NULL_IF_CONFIG_SMALL("X-face image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMVJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smvjpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Sigmatel Motion Video"),
+    },
+
+    {
+        .id        = AV_CODEC_ID_G2M,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "g2m",
+        .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HNM4_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hnm4video",
+        .long_name = NULL_IF_CONFIG_SMALL("HNM 4 video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HEVC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hevc",
+        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_FIC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "fic",
+        .long_name = NULL_IF_CONFIG_SMALL("Mirillis FIC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_VIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "paf_video",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP7,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp7",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SANM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sanm",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SGIRLE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sgirle",
+        .long_name = NULL_IF_CONFIG_SMALL("SGI RLE 8-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_HQX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hqx",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQX"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HQ_HQA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hq_hqa",
+        .long_name = NULL_IF_CONFIG_SMALL("Canopus HQ/HQA"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HAP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hap",
+        .long_name = NULL_IF_CONFIG_SMALL("Vidvox Hap decoder"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DXV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dxv",
+        .long_name = NULL_IF_CONFIG_SMALL("Resolume DXV"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SCREENPRESSO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "screenpresso",
+        .long_name = NULL_IF_CONFIG_SMALL("Screenpresso"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WRAPPED_AVFRAME,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wrapped_avframe",
+        .long_name = NULL_IF_CONFIG_SMALL("AVFrame to AVPacket passthrough"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_RSCC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "rscc",
+        .long_name = NULL_IF_CONFIG_SMALL("innoHeim/Rsupport Screen Capture Codec"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+
+    /* image codecs */
+    {
+        .id        = AV_CODEC_ID_ALIAS_PIX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "alias_pix",
+        .long_name = NULL_IF_CONFIG_SMALL("Alias/Wavefront PIX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ANSI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ansi",
+        .long_name = NULL_IF_CONFIG_SMALL("ASCII/ANSI art"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BRENDER_PIX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "brender_pix",
+        .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DDS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dds",
+        .long_name = NULL_IF_CONFIG_SMALL("DirectDraw Surface image decoder"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DPX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "dpx",
+        .long_name = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_EXR,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "exr",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_GIF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "gif",
+        .long_name = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/gif"),
+    },
+    {
+        .id        = AV_CODEC_ID_JPEGLS,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "jpegls",
+        .long_name = NULL_IF_CONFIG_SMALL("JPEG-LS"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_LJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ljpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("Lossless JPEG"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PAM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pam",
+        .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-portable-pixmap"),
+    },
+    {
+        .id        = AV_CODEC_ID_PBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pbm",
+        .long_name = NULL_IF_CONFIG_SMALL("PBM (Portable BitMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pcx",
+        .long_name = NULL_IF_CONFIG_SMALL("PC Paintbrush PCX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-pcx"),
+    },
+    {
+        .id        = AV_CODEC_ID_PGM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgm",
+        .long_name = NULL_IF_CONFIG_SMALL("PGM (Portable GrayMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PGMYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "pgmyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("PGMYUV (Portable GrayMap YUV) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PNG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "png",
+        .long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
+    },
+    {
+        .id        = AV_CODEC_ID_PPM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ppm",
+        .long_name = NULL_IF_CONFIG_SMALL("PPM (Portable PixelMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PTX,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ptx",
+        .long_name = NULL_IF_CONFIG_SMALL("V.Flash PTX image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SGI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sgi",
+        .long_name = NULL_IF_CONFIG_SMALL("SGI image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SP5X,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sp5x",
+        .long_name = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SUNRAST,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sunrast",
+        .long_name = NULL_IF_CONFIG_SMALL("Sun Rasterfile image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TARGA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "targa",
+        .long_name = NULL_IF_CONFIG_SMALL("Truevision Targa image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-targa", "image/x-tga"),
+    },
+    {
+        .id        = AV_CODEC_ID_TDSC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tdsc",
+        .long_name = NULL_IF_CONFIG_SMALL("TDSC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TIFF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "tiff",
+        .long_name = NULL_IF_CONFIG_SMALL("TIFF image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/tiff"),
+    },
+    {
+        .id        = AV_CODEC_ID_TXD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "txd",
+        .long_name = NULL_IF_CONFIG_SMALL("Renderware TXD (TeXture Dictionary) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VC1IMAGE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vc1image",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image v2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "webp",
+        .long_name = NULL_IF_CONFIG_SMALL("WebP"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/webp"),
+    },
+    {
+        .id        = AV_CODEC_ID_WMV3IMAGE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "wmv3image",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 Image"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XBM,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xbm",
+        .long_name = NULL_IF_CONFIG_SMALL("XBM (X BitMap) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_XWD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xwd",
+        .long_name = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xwindowdump"),
+    },
+    {
+        .id        = AV_CODEC_ID_APNG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "apng",
+        .long_name = NULL_IF_CONFIG_SMALL("APNG (Animated Portable Network Graphics) image"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
+    },
+    {
+        .id        = AV_CODEC_ID_CFHD,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cfhd",
+        .long_name = NULL_IF_CONFIG_SMALL("Cineform HD"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEMOTION2RT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "truemotion2rt",
+        .long_name = NULL_IF_CONFIG_SMALL("Duck TrueMotion 2.0 Real Time"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MAGICYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "magicyuv",
+        .long_name = NULL_IF_CONFIG_SMALL("MagicYUV Lossless Video"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SHEERVIDEO,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sheervideo",
+        .long_name = NULL_IF_CONFIG_SMALL("BitJazz SheerVideo"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_YLC,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ylc",
+        .long_name = NULL_IF_CONFIG_SMALL("YUY2 Lossless Codec"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+
+    /* various PCM "codecs" */
+    {
+        .id        = AV_CODEC_ID_PCM_S16LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U16LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u16le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 16-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U16BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u16be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 16-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S8,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s8",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U8,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u8",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 8-bit"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_MULAW,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_mulaw",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM mu-law / G.711 mu-law"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_ALAW,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_alaw",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM A-law / G.711 A-law"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U32LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u32le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 32-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U32BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u32be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 32-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U24LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u24le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 24-bit little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_U24BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_u24be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM unsigned 24-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24DAUD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24daud",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM D-Cinema audio signed 24-bit"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_ZORK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_zork",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM Zork"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16BE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16be_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S16LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32LE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32le_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_DVD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_dvd",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 20|24-bit big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F32BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f32be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 32-bit floating point big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F32LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f32le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 32-bit floating point little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F64BE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f64be",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 64-bit floating point big-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_F64LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_f64le",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM 64-bit floating point little-endian"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_BLURAY,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_bluray",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16|20|24-bit big-endian for Blu-ray media"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_LXF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_lxf",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 20-bit little-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_S302M,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "s302m",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE 302M"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S8_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s8_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 8-bit planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+
+    /* various ADPCM codecs */
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_QT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_qt",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA QuickTime"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_WAV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_wav",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA WAV"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_DK3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_dk3",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Duck DK3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_DK4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_dk4",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Duck DK4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_WS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ws",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Westwood"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_SMJPEG,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_smjpeg",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Loki SDL MJPEG"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_MS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ms",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Microsoft"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_4XM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_4xm",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM 4X Movie"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_XA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_xa",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM CDROM XA"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_ADX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_adx",
+        .long_name = NULL_IF_CONFIG_SMALL("SEGA CRI ADX ADPCM"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G726,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g726",
+        .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_CT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ct",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Creative Technology"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SWF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_swf",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Shockwave Flash"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_YAMAHA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_yamaha",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SBPRO_4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_sbpro_4",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 4-bit"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SBPRO_3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_sbpro_3",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 2.6-bit"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_SBPRO_2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_sbpro_2",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Sound Blaster Pro 2-bit"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_THP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_thp",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_THP_LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_thp_le",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo THP (Little-Endian)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_AMV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_amv",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA AMV"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_R1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_r1",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_R3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_r3",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R3"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_R2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_r2",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts R2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_EA_SEAD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ea_sead",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Electronic Arts SEAD"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_EA_EACS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_ea_eacs",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Electronic Arts EACS"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_XAS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_xas",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts XAS"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_EA_MAXIS_XA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ea_maxis_xa",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Electronic Arts Maxis CDROM XA"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_ISS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_iss",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Funcom ISS"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G722,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g722",
+        .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_APC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_apc",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA CRYO APC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_AFC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_afc",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube AFC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_OKI,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_oki",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Dialogic OKI"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_DTK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_dtk",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube DTK"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_RAD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_rad",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Radical"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G726LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g726le",
+        .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM little-endian"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_VIMA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_vima",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_PSX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_psx",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Playstation"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_AICA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_aica",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Yamaha AICA"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_DAT4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_dat4",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Eurocom DAT4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+
+    /* AMR */
+    {
+        .id        = AV_CODEC_ID_AMR_NB,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "amr_nb",
+        .long_name = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate NarrowBand)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AMR_WB,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "amr_wb",
+        .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+
+    /* RealAudio codecs*/
+    {
+        .id        = AV_CODEC_ID_RA_144,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ra_144",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RA_288,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ra_288",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio 2.0 (28.8K)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+
+    /* various DPCM codecs */
+    {
+        .id        = AV_CODEC_ID_ROQ_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "roq_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM id RoQ"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "interplay_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Interplay"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XAN_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xan_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Xan"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SOL_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sol_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Sol"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SDX2_DPCM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sdx2_dpcm",
+        .long_name = NULL_IF_CONFIG_SMALL("DPCM Squareroot-Delta-Exact"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+
+    /* audio codecs */
+    {
+        .id        = AV_CODEC_ID_MP2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp2",
+        .long_name = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MP3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp3",
+        .long_name = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aac",
+        .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_AC3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ac3",
+        .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DTS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dts",
+        .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_VORBIS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "vorbis",
+        .long_name = NULL_IF_CONFIG_SMALL("Vorbis"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DVAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dvaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("DV audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAV1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmav1",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAV2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmav2",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MACE3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mace3",
+        .long_name = NULL_IF_CONFIG_SMALL("MACE (Macintosh Audio Compression/Expansion) 3:1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MACE6,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mace6",
+        .long_name = NULL_IF_CONFIG_SMALL("MACE (Macintosh Audio Compression/Expansion) 6:1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VMDAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "vmdaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("Sierra VMD audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FLAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "flac",
+        .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MP3ADU,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp3adu",
+        .long_name = NULL_IF_CONFIG_SMALL("ADU (Application Data Unit) MP3 (MPEG audio layer 3)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MP3ON4,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp3on4",
+        .long_name = NULL_IF_CONFIG_SMALL("MP3onMP4"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SHORTEN,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "shorten",
+        .long_name = NULL_IF_CONFIG_SMALL("Shorten"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ALAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "alac",
+        .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_WESTWOOD_SND1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "westwood_snd1",
+        .long_name = NULL_IF_CONFIG_SMALL("Westwood Audio (SND1)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_GSM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "gsm",
+        .long_name = NULL_IF_CONFIG_SMALL("GSM"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QDM2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "qdm2",
+        .long_name = NULL_IF_CONFIG_SMALL("QDesign Music Codec 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_COOK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "cook",
+        .long_name = NULL_IF_CONFIG_SMALL("Cook / Cooker / Gecko (RealAudio G2)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUESPEECH,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "truespeech",
+        .long_name = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TTA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "tta",
+        .long_name = NULL_IF_CONFIG_SMALL("TTA (True Audio)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_SMACKAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "smackaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("Smacker audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_QCELP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "qcelp",
+        .long_name = NULL_IF_CONFIG_SMALL("QCELP / PureVoice"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WAVPACK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavpack",
+        .long_name = NULL_IF_CONFIG_SMALL("WavPack"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_DSICINAUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsicinaudio",
+        .long_name = NULL_IF_CONFIG_SMALL("Delphine Software International CIN audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_IMC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "imc",
+        .long_name = NULL_IF_CONFIG_SMALL("IMC (Intel Music Coder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MUSEPACK7,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "musepack7",
+        .long_name = NULL_IF_CONFIG_SMALL("Musepack SV7"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MLP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mlp",
+        .long_name = NULL_IF_CONFIG_SMALL("MLP (Meridian Lossless Packing)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_GSM_MS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "gsm_ms",
+        .long_name = NULL_IF_CONFIG_SMALL("GSM Microsoft variant"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3 (Adaptive TRansform Acoustic Coding 3)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+#if FF_API_VOXWARE
+    {
+        .id        = AV_CODEC_ID_VOXWARE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "voxware",
+        .long_name = NULL_IF_CONFIG_SMALL("Voxware RT29 Metasound"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+#endif
+    {
+        .id        = AV_CODEC_ID_APE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ape",
+        .long_name = NULL_IF_CONFIG_SMALL("Monkey's Audio"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_NELLYMOSER,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "nellymoser",
+        .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MUSEPACK8,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "musepack8",
+        .long_name = NULL_IF_CONFIG_SMALL("Musepack SV8"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SPEEX,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "speex",
+        .long_name = NULL_IF_CONFIG_SMALL("Speex"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAVOICE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmavoice",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMAPRO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmapro",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WMALOSSLESS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wmalossless",
+        .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Lossless"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC3P,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac3p",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_EAC3,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "eac3",
+        .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SIPR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sipr",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio SIPR / ACELP.NET"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_MP1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp1",
+        .long_name = NULL_IF_CONFIG_SMALL("MP1 (MPEG audio layer 1)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TWINVQ,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "twinvq",
+        .long_name = NULL_IF_CONFIG_SMALL("VQF TwinVQ"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TRUEHD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "truehd",
+        .long_name = NULL_IF_CONFIG_SMALL("TrueHD"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_MP4ALS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "mp4als",
+        .long_name = NULL_IF_CONFIG_SMALL("MPEG-4 Audio Lossless Coding (ALS)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ATRAC1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "atrac1",
+        .long_name = NULL_IF_CONFIG_SMALL("ATRAC1 (Adaptive TRansform Acoustic Coding)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BINKAUDIO_RDFT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "binkaudio_rdft",
+        .long_name = NULL_IF_CONFIG_SMALL("Bink Audio (RDFT)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BINKAUDIO_DCT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "binkaudio_dct",
+        .long_name = NULL_IF_CONFIG_SMALL("Bink Audio (DCT)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_AAC_LATM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "aac_latm",
+        .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+        .profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+    },
+    {
+        .id        = AV_CODEC_ID_QDMC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "qdmc",
+        .long_name = NULL_IF_CONFIG_SMALL("QDesign Music"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_CELT,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "celt",
+        .long_name = NULL_IF_CONFIG_SMALL("Constrained Energy Lapped Transform (CELT)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_G723_1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "g723_1",
+        .long_name = NULL_IF_CONFIG_SMALL("G.723.1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSS_SP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dss_sp",
+        .long_name = NULL_IF_CONFIG_SMALL("Digital Speech Standard - Standard Play mode (DSS SP)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_G729,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "g729",
+        .long_name = NULL_IF_CONFIG_SMALL("G.729"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_8SVX_EXP,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "8svx_exp",
+        .long_name = NULL_IF_CONFIG_SMALL("8SVX exponential"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_8SVX_FIB,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "8svx_fib",
+        .long_name = NULL_IF_CONFIG_SMALL("8SVX fibonacci"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_BMV_AUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "bmv_audio",
+        .long_name = NULL_IF_CONFIG_SMALL("Discworld II BMV audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_RALF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ralf",
+        .long_name = NULL_IF_CONFIG_SMALL("RealAudio Lossless"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_IAC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "iac",
+        .long_name = NULL_IF_CONFIG_SMALL("IAC (Indeo Audio Coder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ILBC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "ilbc",
+        .long_name = NULL_IF_CONFIG_SMALL("iLBC (Internet Low Bitrate Codec)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_FFWAVESYNTH,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavesynth",
+        .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonic",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC_LS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonicls",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
+    },
+    {
+        .id        = AV_CODEC_ID_OPUS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "opus",
+        .long_name = NULL_IF_CONFIG_SMALL("Opus (Opus Interactive Audio Codec)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_COMFORT_NOISE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "comfortnoise",
+        .long_name = NULL_IF_CONFIG_SMALL("RFC 3389 Comfort Noise"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_TAK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "tak",
+        .long_name = NULL_IF_CONFIG_SMALL("TAK (Tom's lossless Audio Kompressor)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_METASOUND,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "metasound",
+        .long_name = NULL_IF_CONFIG_SMALL("Voxware MetaSound"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_AUDIO,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "paf_audio",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Audio"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ON2AVC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "avc",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 Audio for Video Codec"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_EVRC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "evrc",
+        .long_name = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "smv",
+        .long_name = NULL_IF_CONFIG_SMALL("SMV (Selectable Mode Vocoder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_4GV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "4gv",
+        .long_name = NULL_IF_CONFIG_SMALL("4GV (Fourth Generation Vocoder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_LSBF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_lsbf",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_MSBF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_msbf",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_LSBF_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_lsbf_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first, planar"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_MSBF_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_msbf_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first, planar"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_INTERPLAY_ACM,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "interplayacm",
+        .long_name = NULL_IF_CONFIG_SMALL("Interplay ACM"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XMA1,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xma1",
+        .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_XMA2,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "xma2",
+        .long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DST,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dst",
+        .long_name = NULL_IF_CONFIG_SMALL("DST (Direct Stream Transfer)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_MTAF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_mtaf",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM MTAF"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+
+    /* subtitle codecs */
+    {
+        .id        = AV_CODEC_ID_DVD_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "dvd_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("DVD subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_DVB_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "dvb_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("DVB subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_TEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "text",
+        .long_name = NULL_IF_CONFIG_SMALL("raw UTF-8 text"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_XSUB,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "xsub",
+        .long_name = NULL_IF_CONFIG_SMALL("XSUB"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_ASS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ass",
+        .long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SSA,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ssa",
+        .long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MOV_TEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "mov_text",
+        .long_name = NULL_IF_CONFIG_SMALL("MOV text"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_HDMV_PGS_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "hdmv_pgs_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("HDMV Presentation Graphic Stream subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_DVB_TELETEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "dvb_teletext",
+        .long_name = NULL_IF_CONFIG_SMALL("DVB teletext"),
+    },
+    {
+        .id        = AV_CODEC_ID_SRT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "srt",
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBRIP,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subrip",
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MICRODVD,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "microdvd",
+        .long_name = NULL_IF_CONFIG_SMALL("MicroDVD subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MPL2,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "mpl2",
+        .long_name = NULL_IF_CONFIG_SMALL("MPL2 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_EIA_608,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "eia_608",
+        .long_name = NULL_IF_CONFIG_SMALL("EIA-608 closed captions"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_JACOSUB,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "jacosub",
+        .long_name = NULL_IF_CONFIG_SMALL("JACOsub subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_PJS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "pjs",
+        .long_name = NULL_IF_CONFIG_SMALL("PJS (Phoenix Japanimation Society) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SAMI,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "sami",
+        .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_REALTEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "realtext",
+        .long_name = NULL_IF_CONFIG_SMALL("RealText subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_STL,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "stl",
+        .long_name = NULL_IF_CONFIG_SMALL("Spruce subtitle format"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBVIEWER1,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subviewer1",
+        .long_name = NULL_IF_CONFIG_SMALL("SubViewer v1 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBVIEWER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subviewer",
+        .long_name = NULL_IF_CONFIG_SMALL("SubViewer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_VPLAYER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "vplayer",
+        .long_name = NULL_IF_CONFIG_SMALL("VPlayer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBVTT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "webvtt",
+        .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_HDMV_TEXT_SUBTITLE,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "hdmv_text_subtitle",
+        .long_name = NULL_IF_CONFIG_SMALL("HDMV Text subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+
+    /* other kind of codecs and pseudo-codecs */
+    {
+        .id        = AV_CODEC_ID_TTF,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "ttf",
+        .long_name = NULL_IF_CONFIG_SMALL("TrueType font"),
+        .mime_types= MT("application/x-truetype-font", "application/x-font"),
+    },
+    {
+        .id        = AV_CODEC_ID_BINTEXT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bintext",
+        .long_name = NULL_IF_CONFIG_SMALL("Binary text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_XBIN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xbin",
+        .long_name = NULL_IF_CONFIG_SMALL("eXtended BINary text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_IDF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "idf",
+        .long_name = NULL_IF_CONFIG_SMALL("iCEDraw text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_OTF,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "otf",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenType font"),
+        .mime_types= MT("application/vnd.ms-opentype"),
+    },
+    {
+        .id        = AV_CODEC_ID_SMPTE_KLV,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "klv",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE 336M Key-Length-Value (KLV) metadata"),
+    },
+    {
+        .id        = AV_CODEC_ID_DVD_NAV,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "dvd_nav_packet",
+        .long_name = NULL_IF_CONFIG_SMALL("DVD Nav packet"),
+    },
+    {
+        .id        = AV_CODEC_ID_TIMED_ID3,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "timed_id3",
+        .long_name = NULL_IF_CONFIG_SMALL("timed ID3 metadata"),
+    },
+    {
+        .id        = AV_CODEC_ID_BIN_DATA,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "bin_data",
+        .long_name = NULL_IF_CONFIG_SMALL("binary data"),
+        .mime_types= MT("application/octet-stream"),
+    },
+
+    /* deprecated codec ids */
+};
+
+const AVCodecDescriptor *avcodec_descriptor_get(enum AVCodecID id)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(codec_descriptors); i++)
+        if (codec_descriptors[i].id == id)
+            return &codec_descriptors[i];
+    return NULL;
+}
+
+const AVCodecDescriptor *avcodec_descriptor_next(const AVCodecDescriptor *prev)
+{
+    if (!prev)
+        return &codec_descriptors[0];
+    if (prev - codec_descriptors < FF_ARRAY_ELEMS(codec_descriptors) - 1)
+        return prev + 1;
+    return NULL;
+}
+
+const AVCodecDescriptor *avcodec_descriptor_get_by_name(const char *name)
+{
+    const AVCodecDescriptor *desc = NULL;
+
+    while ((desc = avcodec_descriptor_next(desc)))
+        if (!strcmp(desc->name, name))
+            return desc;
+    return NULL;
+}
+
+enum AVMediaType avcodec_get_type(enum AVCodecID codec_id)
+{
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id);
+    return desc ? desc->type : AVMEDIA_TYPE_UNKNOWN;
+}
diff --git a/media/ffvpx/libavcodec/dct.h b/media/ffvpx/libavcodec/dct.h
new file mode 100644
index 000000000..05297ba9d
--- /dev/null
+++ b/media/ffvpx/libavcodec/dct.h
@@ -0,0 +1,68 @@
+/*
+ * (I)DCT Transforms
+ * Copyright (c) 2009 Peter Ross <pross@xvid.org>
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#if !defined(AVCODEC_DCT_H) && (!defined(FFT_FLOAT) || FFT_FLOAT)
+#define AVCODEC_DCT_H
+
+#include <stdint.h>
+
+#include "rdft.h"
+
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+    void (*dct32)(FFTSample *out, const FFTSample *in);
+};
+
+/**
+ * Set up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_end (DCTContext *s);
+
+void ff_dct_init_x86(DCTContext *s);
+
+void ff_fdct_ifast(int16_t *data);
+void ff_fdct_ifast248(int16_t *data);
+void ff_jpeg_fdct_islow_8(int16_t *data);
+void ff_jpeg_fdct_islow_10(int16_t *data);
+void ff_fdct248_islow_8(int16_t *data);
+void ff_fdct248_islow_10(int16_t *data);
+
+void ff_j_rev_dct(int16_t *data);
+void ff_j_rev_dct4(int16_t *data);
+void ff_j_rev_dct2(int16_t *data);
+void ff_j_rev_dct1(int16_t *data);
+void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block);
+void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block);
+
+#endif /* AVCODEC_DCT_H */
diff --git a/media/ffvpx/libavcodec/dummy_funcs.c b/media/ffvpx/libavcodec/dummy_funcs.c
new file mode 100644
index 000000000..a065c3576
--- /dev/null
+++ b/media/ffvpx/libavcodec/dummy_funcs.c
@@ -0,0 +1,801 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "avcodec.h"
+
+typedef struct H264PredContext H264PredContext;
+typedef struct VideoDSPContext VideoDSPContext;
+typedef struct VP8DSPContext VP8DSPContext;
+typedef struct VP9DSPContext VP9DSPContext;
+typedef struct FLACDSPContext FLACDSPContext;
+
+AVHWAccel ff_h263_vaapi_hwaccel;
+AVHWAccel ff_h263_vdpau_hwaccel;
+AVHWAccel ff_h263_videotoolbox_hwaccel;
+AVHWAccel ff_h264_d3d11va_hwaccel;
+AVHWAccel ff_h264_dxva2_hwaccel;
+AVHWAccel ff_h264_mmal_hwaccel;
+AVHWAccel ff_h264_qsv_hwaccel;
+AVHWAccel ff_h264_vaapi_hwaccel;
+AVHWAccel ff_h264_vda_hwaccel;
+AVHWAccel ff_h264_vda_old_hwaccel;
+AVHWAccel ff_h264_vdpau_hwaccel;
+AVHWAccel ff_h264_videotoolbox_hwaccel;
+AVHWAccel ff_hevc_d3d11va_hwaccel;
+AVHWAccel ff_hevc_dxva2_hwaccel;
+AVHWAccel ff_hevc_qsv_hwaccel;
+AVHWAccel ff_hevc_vaapi_hwaccel;
+AVHWAccel ff_hevc_vdpau_hwaccel;
+AVHWAccel ff_mpeg1_xvmc_hwaccel;
+AVHWAccel ff_mpeg1_vdpau_hwaccel;
+AVHWAccel ff_mpeg1_videotoolbox_hwaccel;
+AVHWAccel ff_mpeg2_xvmc_hwaccel;
+AVHWAccel ff_mpeg2_d3d11va_hwaccel;
+AVHWAccel ff_mpeg2_dxva2_hwaccel;
+AVHWAccel ff_mpeg2_qsv_hwaccel;
+AVHWAccel ff_mpeg2_vaapi_hwaccel;
+AVHWAccel ff_mpeg2_vdpau_hwaccel;
+AVHWAccel ff_mpeg2_videotoolbox_hwaccel;
+AVHWAccel ff_mpeg4_mmal_hwaccel;
+AVHWAccel ff_mpeg4_vaapi_hwaccel;
+AVHWAccel ff_mpeg4_vdpau_hwaccel;
+AVHWAccel ff_mpeg4_videotoolbox_hwaccel;
+AVHWAccel ff_vc1_d3d11va_hwaccel;
+AVHWAccel ff_vc1_dxva2_hwaccel;
+AVHWAccel ff_vc1_vaapi_hwaccel;
+AVHWAccel ff_vc1_vdpau_hwaccel;
+AVHWAccel ff_vc1_qsv_hwaccel;
+AVHWAccel ff_wmv3_d3d11va_hwaccel;
+AVHWAccel ff_wmv3_dxva2_hwaccel;
+AVHWAccel ff_wmv3_vaapi_hwaccel;
+AVHWAccel ff_wmv3_vdpau_hwaccel;
+AVHWAccel ff_mpeg2_mmal_hwaccel;
+AVHWAccel ff_vc1_mmal_hwaccel;
+AVHWAccel ff_vp9_d3d11va_hwaccel;
+AVHWAccel ff_vp9_dxva2_hwaccel;
+AVHWAccel ff_vp9_vaapi_hwaccel;
+AVHWAccel ff_vp9_cuvid_hwaccel;
+AVHWAccel ff_vp8_cuvid_hwaccel;
+AVHWAccel ff_vc1_cuvid_hwaccel;
+AVHWAccel ff_hevc_cuvid_hwaccel;
+AVHWAccel ff_h264_cuvid_hwaccel;
+
+AVCodec ff_a64multi_encoder;
+AVCodec ff_a64multi5_encoder;
+AVCodec ff_aasc_decoder;
+AVCodec ff_aic_decoder;
+AVCodec ff_alias_pix_encoder;
+AVCodec ff_alias_pix_decoder;
+AVCodec ff_amv_encoder;
+AVCodec ff_amv_decoder;
+AVCodec ff_anm_decoder;
+AVCodec ff_ansi_decoder;
+AVCodec ff_apng_encoder;
+AVCodec ff_apng_decoder;
+AVCodec ff_asv1_encoder;
+AVCodec ff_asv1_decoder;
+AVCodec ff_asv2_encoder;
+AVCodec ff_asv2_decoder;
+AVCodec ff_aura_decoder;
+AVCodec ff_aura2_decoder;
+AVCodec ff_avrp_encoder;
+AVCodec ff_avrp_decoder;
+AVCodec ff_avrn_decoder;
+AVCodec ff_avs_decoder;
+AVCodec ff_avui_encoder;
+AVCodec ff_avui_decoder;
+AVCodec ff_ayuv_encoder;
+AVCodec ff_ayuv_decoder;
+AVCodec ff_bethsoftvid_decoder;
+AVCodec ff_bfi_decoder;
+AVCodec ff_bink_decoder;
+AVCodec ff_bmp_encoder;
+AVCodec ff_bmp_decoder;
+AVCodec ff_bmv_video_decoder;
+AVCodec ff_brender_pix_decoder;
+AVCodec ff_c93_decoder;
+AVCodec ff_cavs_decoder;
+AVCodec ff_cdgraphics_decoder;
+AVCodec ff_cdxl_decoder;
+AVCodec ff_cfhd_decoder;
+AVCodec ff_cinepak_encoder;
+AVCodec ff_cinepak_decoder;
+AVCodec ff_cljr_encoder;
+AVCodec ff_cljr_decoder;
+AVCodec ff_cllc_decoder;
+AVCodec ff_comfortnoise_encoder;
+AVCodec ff_comfortnoise_decoder;
+AVCodec ff_cpia_decoder;
+AVCodec ff_cscd_decoder;
+AVCodec ff_cyuv_decoder;
+AVCodec ff_dds_decoder;
+AVCodec ff_dfa_decoder;
+AVCodec ff_dirac_decoder;
+AVCodec ff_dnxhd_encoder;
+AVCodec ff_dnxhd_decoder;
+AVCodec ff_dpx_encoder;
+AVCodec ff_dpx_decoder;
+AVCodec ff_dsicinvideo_decoder;
+AVCodec ff_dvaudio_decoder;
+AVCodec ff_dvvideo_encoder;
+AVCodec ff_dvvideo_decoder;
+AVCodec ff_dxa_decoder;
+AVCodec ff_dxtory_decoder;
+AVCodec ff_eacmv_decoder;
+AVCodec ff_eamad_decoder;
+AVCodec ff_eatgq_decoder;
+AVCodec ff_eatgv_decoder;
+AVCodec ff_eatqi_decoder;
+AVCodec ff_eightbps_decoder;
+AVCodec ff_eightsvx_exp_decoder;
+AVCodec ff_eightsvx_fib_decoder;
+AVCodec ff_escape124_decoder;
+AVCodec ff_escape130_decoder;
+AVCodec ff_exr_decoder;
+AVCodec ff_ffv1_encoder;
+AVCodec ff_ffv1_decoder;
+AVCodec ff_ffvhuff_encoder;
+AVCodec ff_ffvhuff_decoder;
+AVCodec ff_fic_decoder;
+AVCodec ff_flashsv_encoder;
+AVCodec ff_flashsv_decoder;
+AVCodec ff_flashsv2_encoder;
+AVCodec ff_flashsv2_decoder;
+AVCodec ff_flic_decoder;
+AVCodec ff_flv_encoder;
+AVCodec ff_flv_decoder;
+AVCodec ff_fourxm_decoder;
+AVCodec ff_fraps_decoder;
+AVCodec ff_frwu_decoder;
+AVCodec ff_g2m_decoder;
+AVCodec ff_gif_encoder;
+AVCodec ff_gif_decoder;
+AVCodec ff_h261_encoder;
+AVCodec ff_h261_decoder;
+AVCodec ff_h263_encoder;
+AVCodec ff_h263_decoder;
+AVCodec ff_h263i_decoder;
+AVCodec ff_h263p_encoder;
+AVCodec ff_h263p_decoder;
+AVCodec ff_h264_decoder;
+AVCodec ff_h264_crystalhd_decoder;
+AVCodec ff_h264_mmal_decoder;
+AVCodec ff_h264_qsv_decoder;
+AVCodec ff_h264_vda_decoder;
+AVCodec ff_h264_vdpau_decoder;
+AVCodec ff_hap_encoder;
+AVCodec ff_hap_decoder;
+AVCodec ff_hevc_decoder;
+AVCodec ff_hevc_qsv_decoder;
+AVCodec ff_hnm4_video_decoder;
+AVCodec ff_hq_hqa_decoder;
+AVCodec ff_hqx_decoder;
+AVCodec ff_huffyuv_encoder;
+AVCodec ff_huffyuv_decoder;
+AVCodec ff_idcin_decoder;
+AVCodec ff_iff_byterun1_decoder;
+AVCodec ff_iff_ilbm_decoder;
+AVCodec ff_indeo2_decoder;
+AVCodec ff_indeo3_decoder;
+AVCodec ff_indeo4_decoder;
+AVCodec ff_indeo5_decoder;
+AVCodec ff_interplay_video_decoder;
+AVCodec ff_jpeg2000_encoder;
+AVCodec ff_jpeg2000_decoder;
+AVCodec ff_jpegls_encoder;
+AVCodec ff_jpegls_decoder;
+AVCodec ff_jv_decoder;
+AVCodec ff_kgv1_decoder;
+AVCodec ff_kmvc_decoder;
+AVCodec ff_lagarith_decoder;
+AVCodec ff_ljpeg_encoder;
+AVCodec ff_loco_decoder;
+AVCodec ff_mdec_decoder;
+AVCodec ff_mimic_decoder;
+AVCodec ff_mjpeg_encoder;
+AVCodec ff_mjpeg_decoder;
+AVCodec ff_mjpegb_decoder;
+AVCodec ff_mmvideo_decoder;
+AVCodec ff_motionpixels_decoder;
+AVCodec ff_mpeg_xvmc_decoder;
+AVCodec ff_mpeg1video_encoder;
+AVCodec ff_mpeg1video_decoder;
+AVCodec ff_mpeg2video_encoder;
+AVCodec ff_mpeg2video_decoder;
+AVCodec ff_mpeg4_encoder;
+AVCodec ff_mpeg4_decoder;
+AVCodec ff_mpeg4_crystalhd_decoder;
+AVCodec ff_mpeg4_mmal_decoder;
+AVCodec ff_mpeg4_vdpau_decoder;
+AVCodec ff_mpegvideo_decoder;
+AVCodec ff_mpeg_vdpau_decoder;
+AVCodec ff_mpeg1_vdpau_decoder;
+AVCodec ff_mpeg2_crystalhd_decoder;
+AVCodec ff_mpeg2_qsv_decoder;
+AVCodec ff_msa1_decoder;
+AVCodec ff_msmpeg4_crystalhd_decoder;
+AVCodec ff_msmpeg4v1_decoder;
+AVCodec ff_msmpeg4v2_encoder;
+AVCodec ff_msmpeg4v2_decoder;
+AVCodec ff_msmpeg4v3_encoder;
+AVCodec ff_msmpeg4v3_decoder;
+AVCodec ff_msrle_decoder;
+AVCodec ff_mss1_decoder;
+AVCodec ff_mss2_decoder;
+AVCodec ff_msvideo1_encoder;
+AVCodec ff_msvideo1_decoder;
+AVCodec ff_mszh_decoder;
+AVCodec ff_mts2_decoder;
+AVCodec ff_mvc1_decoder;
+AVCodec ff_mvc2_decoder;
+AVCodec ff_mxpeg_decoder;
+AVCodec ff_nuv_decoder;
+AVCodec ff_paf_video_decoder;
+AVCodec ff_pam_encoder;
+AVCodec ff_pam_decoder;
+AVCodec ff_pbm_encoder;
+AVCodec ff_pbm_decoder;
+AVCodec ff_pcx_encoder;
+AVCodec ff_pcx_decoder;
+AVCodec ff_pgm_encoder;
+AVCodec ff_pgm_decoder;
+AVCodec ff_pgmyuv_encoder;
+AVCodec ff_pgmyuv_decoder;
+AVCodec ff_pictor_decoder;
+AVCodec ff_png_encoder;
+AVCodec ff_png_decoder;
+AVCodec ff_ppm_encoder;
+AVCodec ff_ppm_decoder;
+AVCodec ff_prores_encoder;
+AVCodec ff_prores_decoder;
+AVCodec ff_prores_aw_encoder;
+AVCodec ff_prores_ks_encoder;
+AVCodec ff_prores_lgpl_decoder;
+AVCodec ff_ptx_decoder;
+AVCodec ff_qdraw_decoder;
+AVCodec ff_qpeg_decoder;
+AVCodec ff_qtrle_encoder;
+AVCodec ff_qtrle_decoder;
+AVCodec ff_r10k_encoder;
+AVCodec ff_r10k_decoder;
+AVCodec ff_r210_encoder;
+AVCodec ff_r210_decoder;
+AVCodec ff_rawvideo_encoder;
+AVCodec ff_rawvideo_decoder;
+AVCodec ff_rl2_decoder;
+AVCodec ff_roq_encoder;
+AVCodec ff_roq_decoder;
+AVCodec ff_rpza_decoder;
+AVCodec ff_rv10_encoder;
+AVCodec ff_rv10_decoder;
+AVCodec ff_rv20_encoder;
+AVCodec ff_rv20_decoder;
+AVCodec ff_rv30_decoder;
+AVCodec ff_rv40_decoder;
+AVCodec ff_s302m_encoder;
+AVCodec ff_s302m_decoder;
+AVCodec ff_sanm_decoder;
+AVCodec ff_sgi_encoder;
+AVCodec ff_sgi_decoder;
+AVCodec ff_sgirle_decoder;
+AVCodec ff_smacker_decoder;
+AVCodec ff_smc_decoder;
+AVCodec ff_smvjpeg_decoder;
+AVCodec ff_snow_encoder;
+AVCodec ff_snow_decoder;
+AVCodec ff_sp5x_decoder;
+AVCodec ff_sunrast_encoder;
+AVCodec ff_sunrast_decoder;
+AVCodec ff_svq1_encoder;
+AVCodec ff_svq1_decoder;
+AVCodec ff_svq3_decoder;
+AVCodec ff_targa_encoder;
+AVCodec ff_targa_decoder;
+AVCodec ff_targa_y216_decoder;
+AVCodec ff_tdsc_decoder;
+AVCodec ff_theora_decoder;
+AVCodec ff_thp_decoder;
+AVCodec ff_tiertexseqvideo_decoder;
+AVCodec ff_tiff_encoder;
+AVCodec ff_tiff_decoder;
+AVCodec ff_tmv_decoder;
+AVCodec ff_truemotion1_decoder;
+AVCodec ff_truemotion2_decoder;
+AVCodec ff_tscc_decoder;
+AVCodec ff_tscc2_decoder;
+AVCodec ff_txd_decoder;
+AVCodec ff_ulti_decoder;
+AVCodec ff_utvideo_encoder;
+AVCodec ff_utvideo_decoder;
+AVCodec ff_v210_encoder;
+AVCodec ff_v210_decoder;
+AVCodec ff_v210x_decoder;
+AVCodec ff_v308_encoder;
+AVCodec ff_v308_decoder;
+AVCodec ff_v408_encoder;
+AVCodec ff_v408_decoder;
+AVCodec ff_v410_encoder;
+AVCodec ff_v410_decoder;
+AVCodec ff_vb_decoder;
+AVCodec ff_vble_decoder;
+AVCodec ff_vc1_decoder;
+AVCodec ff_vc1_crystalhd_decoder;
+AVCodec ff_vc1_vdpau_decoder;
+AVCodec ff_vc1image_decoder;
+AVCodec ff_vc1_qsv_decoder;
+AVCodec ff_vc2_encoder;
+AVCodec ff_vcr1_decoder;
+AVCodec ff_vmdvideo_decoder;
+AVCodec ff_vmnc_decoder;
+AVCodec ff_vp3_decoder;
+AVCodec ff_vp5_decoder;
+AVCodec ff_vp6_decoder;
+AVCodec ff_vp6a_decoder;
+AVCodec ff_vp6f_decoder;
+AVCodec ff_vp7_decoder;
+AVCodec ff_vqa_decoder;
+AVCodec ff_webp_decoder;
+AVCodec ff_wmv1_encoder;
+AVCodec ff_wmv1_decoder;
+AVCodec ff_wmv2_encoder;
+AVCodec ff_wmv2_decoder;
+AVCodec ff_wmv3_decoder;
+AVCodec ff_wmv3_crystalhd_decoder;
+AVCodec ff_wmv3_vdpau_decoder;
+AVCodec ff_wmv3image_decoder;
+AVCodec ff_wnv1_decoder;
+AVCodec ff_xan_wc3_decoder;
+AVCodec ff_xan_wc4_decoder;
+AVCodec ff_xbm_encoder;
+AVCodec ff_xbm_decoder;
+AVCodec ff_xface_encoder;
+AVCodec ff_xface_decoder;
+AVCodec ff_xl_decoder;
+AVCodec ff_xwd_encoder;
+AVCodec ff_xwd_decoder;
+AVCodec ff_y41p_encoder;
+AVCodec ff_y41p_decoder;
+AVCodec ff_yop_decoder;
+AVCodec ff_yuv4_encoder;
+AVCodec ff_yuv4_decoder;
+AVCodec ff_zero12v_decoder;
+AVCodec ff_zerocodec_decoder;
+AVCodec ff_zlib_encoder;
+AVCodec ff_zlib_decoder;
+AVCodec ff_zmbv_encoder;
+AVCodec ff_zmbv_decoder;
+AVCodec ff_aac_encoder;
+AVCodec ff_aac_decoder;
+AVCodec ff_aac_fixed_decoder;
+AVCodec ff_aac_latm_decoder;
+AVCodec ff_ac3_encoder;
+AVCodec ff_ac3_decoder;
+AVCodec ff_ac3_fixed_encoder;
+AVCodec ff_ac3_fixed_decoder;
+AVCodec ff_alac_encoder;
+AVCodec ff_alac_decoder;
+AVCodec ff_als_decoder;
+AVCodec ff_amrnb_decoder;
+AVCodec ff_amrwb_decoder;
+AVCodec ff_ape_decoder;
+AVCodec ff_atrac1_decoder;
+AVCodec ff_atrac3_decoder;
+AVCodec ff_atrac3p_decoder;
+AVCodec ff_binkaudio_dct_decoder;
+AVCodec ff_binkaudio_rdft_decoder;
+AVCodec ff_bmv_audio_decoder;
+AVCodec ff_cook_decoder;
+AVCodec ff_dca_encoder;
+AVCodec ff_dca_decoder;
+AVCodec ff_dsd_lsbf_decoder;
+AVCodec ff_dsd_msbf_decoder;
+AVCodec ff_dsd_lsbf_planar_decoder;
+AVCodec ff_dsd_msbf_planar_decoder;
+AVCodec ff_dsicinaudio_decoder;
+AVCodec ff_dss_sp_decoder;
+AVCodec ff_eac3_encoder;
+AVCodec ff_eac3_decoder;
+AVCodec ff_evrc_decoder;
+AVCodec ff_ffwavesynth_decoder;
+AVCodec ff_flac_encoder;
+AVCodec ff_g723_1_encoder;
+AVCodec ff_g723_1_decoder;
+AVCodec ff_g729_decoder;
+AVCodec ff_gsm_decoder;
+AVCodec ff_gsm_ms_decoder;
+AVCodec ff_iac_decoder;
+AVCodec ff_imc_decoder;
+AVCodec ff_mace3_decoder;
+AVCodec ff_mace6_decoder;
+AVCodec ff_metasound_decoder;
+AVCodec ff_mlp_decoder;
+AVCodec ff_mp1_decoder;
+AVCodec ff_mp1float_decoder;
+AVCodec ff_mp2_encoder;
+AVCodec ff_mp2_decoder;
+AVCodec ff_mp2float_decoder;
+AVCodec ff_mp2fixed_encoder;
+AVCodec ff_mp3_decoder;
+AVCodec ff_mp3float_decoder;
+AVCodec ff_mp3adu_decoder;
+AVCodec ff_mp3adufloat_decoder;
+AVCodec ff_mp3on4_decoder;
+AVCodec ff_mp3on4float_decoder;
+AVCodec ff_mpc7_decoder;
+AVCodec ff_mpc8_decoder;
+AVCodec ff_nellymoser_encoder;
+AVCodec ff_nellymoser_decoder;
+AVCodec ff_on2avc_decoder;
+AVCodec ff_opus_decoder;
+AVCodec ff_paf_audio_decoder;
+AVCodec ff_qcelp_decoder;
+AVCodec ff_qdm2_decoder;
+AVCodec ff_ra_144_encoder;
+AVCodec ff_ra_144_decoder;
+AVCodec ff_ra_288_decoder;
+AVCodec ff_ralf_decoder;
+AVCodec ff_shorten_decoder;
+AVCodec ff_sipr_decoder;
+AVCodec ff_smackaud_decoder;
+AVCodec ff_sonic_encoder;
+AVCodec ff_sonic_decoder;
+AVCodec ff_sonic_ls_encoder;
+AVCodec ff_tak_decoder;
+AVCodec ff_truehd_decoder;
+AVCodec ff_truespeech_decoder;
+AVCodec ff_tta_encoder;
+AVCodec ff_tta_decoder;
+AVCodec ff_twinvq_decoder;
+AVCodec ff_vmdaudio_decoder;
+AVCodec ff_vorbis_encoder;
+AVCodec ff_vorbis_decoder;
+AVCodec ff_wavpack_encoder;
+AVCodec ff_wavpack_decoder;
+AVCodec ff_wmalossless_decoder;
+AVCodec ff_wmapro_decoder;
+AVCodec ff_wmav1_encoder;
+AVCodec ff_wmav1_decoder;
+AVCodec ff_wmav2_encoder;
+AVCodec ff_wmav2_decoder;
+AVCodec ff_wmavoice_decoder;
+AVCodec ff_ws_snd1_decoder;
+AVCodec ff_pcm_alaw_encoder;
+AVCodec ff_pcm_alaw_decoder;
+AVCodec ff_pcm_bluray_decoder;
+AVCodec ff_pcm_dvd_decoder;
+AVCodec ff_pcm_f32be_encoder;
+AVCodec ff_pcm_f32be_decoder;
+AVCodec ff_pcm_f32le_encoder;
+AVCodec ff_pcm_f32le_decoder;
+AVCodec ff_pcm_f64be_encoder;
+AVCodec ff_pcm_f64be_decoder;
+AVCodec ff_pcm_f64le_encoder;
+AVCodec ff_pcm_f64le_decoder;
+AVCodec ff_pcm_lxf_decoder;
+AVCodec ff_pcm_mulaw_encoder;
+AVCodec ff_pcm_mulaw_decoder;
+AVCodec ff_pcm_s8_encoder;
+AVCodec ff_pcm_s8_decoder;
+AVCodec ff_pcm_s8_planar_encoder;
+AVCodec ff_pcm_s8_planar_decoder;
+AVCodec ff_pcm_s16be_encoder;
+AVCodec ff_pcm_s16be_decoder;
+AVCodec ff_pcm_s16be_planar_encoder;
+AVCodec ff_pcm_s16be_planar_decoder;
+AVCodec ff_pcm_s16le_encoder;
+AVCodec ff_pcm_s16le_decoder;
+AVCodec ff_pcm_s16le_planar_encoder;
+AVCodec ff_pcm_s16le_planar_decoder;
+AVCodec ff_pcm_s24be_encoder;
+AVCodec ff_pcm_s24be_decoder;
+AVCodec ff_pcm_s24daud_encoder;
+AVCodec ff_pcm_s24daud_decoder;
+AVCodec ff_pcm_s24le_encoder;
+AVCodec ff_pcm_s24le_decoder;
+AVCodec ff_pcm_s24le_planar_encoder;
+AVCodec ff_pcm_s24le_planar_decoder;
+AVCodec ff_pcm_s32be_encoder;
+AVCodec ff_pcm_s32be_decoder;
+AVCodec ff_pcm_s32le_encoder;
+AVCodec ff_pcm_s32le_decoder;
+AVCodec ff_pcm_s32le_planar_encoder;
+AVCodec ff_pcm_s32le_planar_decoder;
+AVCodec ff_pcm_u8_encoder;
+AVCodec ff_pcm_u8_decoder;
+AVCodec ff_pcm_u16be_encoder;
+AVCodec ff_pcm_u16be_decoder;
+AVCodec ff_pcm_u16le_encoder;
+AVCodec ff_pcm_u16le_decoder;
+AVCodec ff_pcm_u24be_encoder;
+AVCodec ff_pcm_u24be_decoder;
+AVCodec ff_pcm_u24le_encoder;
+AVCodec ff_pcm_u24le_decoder;
+AVCodec ff_pcm_u32be_encoder;
+AVCodec ff_pcm_u32be_decoder;
+AVCodec ff_pcm_u32le_encoder;
+AVCodec ff_pcm_u32le_decoder;
+AVCodec ff_pcm_zork_decoder;
+AVCodec ff_interplay_dpcm_decoder;
+AVCodec ff_roq_dpcm_encoder;
+AVCodec ff_roq_dpcm_decoder;
+AVCodec ff_sol_dpcm_decoder;
+AVCodec ff_xan_dpcm_decoder;
+AVCodec ff_adpcm_4xm_decoder;
+AVCodec ff_adpcm_adx_encoder;
+AVCodec ff_adpcm_adx_decoder;
+AVCodec ff_adpcm_afc_decoder;
+AVCodec ff_adpcm_ct_decoder;
+AVCodec ff_adpcm_dtk_decoder;
+AVCodec ff_adpcm_ea_decoder;
+AVCodec ff_adpcm_ea_maxis_xa_decoder;
+AVCodec ff_adpcm_ea_r1_decoder;
+AVCodec ff_adpcm_ea_r2_decoder;
+AVCodec ff_adpcm_ea_r3_decoder;
+AVCodec ff_adpcm_ea_xas_decoder;
+AVCodec ff_adpcm_g722_encoder;
+AVCodec ff_adpcm_g722_decoder;
+AVCodec ff_adpcm_g726_encoder;
+AVCodec ff_adpcm_g726_decoder;
+AVCodec ff_adpcm_g726le_decoder;
+AVCodec ff_adpcm_ima_amv_decoder;
+AVCodec ff_adpcm_ima_apc_decoder;
+AVCodec ff_adpcm_ima_dk3_decoder;
+AVCodec ff_adpcm_ima_dk4_decoder;
+AVCodec ff_adpcm_ima_ea_eacs_decoder;
+AVCodec ff_adpcm_ima_ea_sead_decoder;
+AVCodec ff_adpcm_ima_iss_decoder;
+AVCodec ff_adpcm_ima_oki_decoder;
+AVCodec ff_adpcm_ima_qt_encoder;
+AVCodec ff_adpcm_ima_qt_decoder;
+AVCodec ff_adpcm_ima_rad_decoder;
+AVCodec ff_adpcm_ima_smjpeg_decoder;
+AVCodec ff_adpcm_ima_wav_encoder;
+AVCodec ff_adpcm_ima_wav_decoder;
+AVCodec ff_adpcm_ima_ws_decoder;
+AVCodec ff_adpcm_ms_encoder;
+AVCodec ff_adpcm_ms_decoder;
+AVCodec ff_adpcm_sbpro_2_decoder;
+AVCodec ff_adpcm_sbpro_3_decoder;
+AVCodec ff_adpcm_sbpro_4_decoder;
+AVCodec ff_adpcm_swf_encoder;
+AVCodec ff_adpcm_swf_decoder;
+AVCodec ff_adpcm_thp_decoder;
+AVCodec ff_adpcm_thp_le_decoder;
+AVCodec ff_adpcm_vima_decoder;
+AVCodec ff_adpcm_xa_decoder;
+AVCodec ff_adpcm_yamaha_encoder;
+AVCodec ff_adpcm_yamaha_decoder;
+AVCodec ff_vima_decoder;
+AVCodec ff_ssa_encoder;
+AVCodec ff_ssa_decoder;
+AVCodec ff_ass_encoder;
+AVCodec ff_ass_decoder;
+AVCodec ff_ccaption_decoder;
+AVCodec ff_dvbsub_encoder;
+AVCodec ff_dvbsub_decoder;
+AVCodec ff_dvdsub_encoder;
+AVCodec ff_dvdsub_decoder;
+AVCodec ff_jacosub_decoder;
+AVCodec ff_microdvd_decoder;
+AVCodec ff_movtext_encoder;
+AVCodec ff_movtext_decoder;
+AVCodec ff_mpl2_decoder;
+AVCodec ff_pgssub_decoder;
+AVCodec ff_pjs_decoder;
+AVCodec ff_realtext_decoder;
+AVCodec ff_sami_decoder;
+AVCodec ff_srt_encoder;
+AVCodec ff_srt_decoder;
+AVCodec ff_stl_decoder;
+AVCodec ff_subrip_encoder;
+AVCodec ff_subrip_decoder;
+AVCodec ff_subviewer_decoder;
+AVCodec ff_subviewer1_decoder;
+AVCodec ff_text_decoder;
+AVCodec ff_vplayer_decoder;
+AVCodec ff_webvtt_encoder;
+AVCodec ff_webvtt_decoder;
+AVCodec ff_xsub_encoder;
+AVCodec ff_xsub_decoder;
+AVCodec ff_libcelt_decoder;
+AVCodec ff_libdcadec_decoder;
+AVCodec ff_libfaac_encoder;
+AVCodec ff_libfdk_aac_encoder;
+AVCodec ff_libfdk_aac_decoder;
+AVCodec ff_libgsm_encoder;
+AVCodec ff_libgsm_decoder;
+AVCodec ff_libgsm_ms_encoder;
+AVCodec ff_libgsm_ms_decoder;
+AVCodec ff_libilbc_encoder;
+AVCodec ff_libilbc_decoder;
+AVCodec ff_libmp3lame_encoder;
+AVCodec ff_libopencore_amrnb_encoder;
+AVCodec ff_libopencore_amrnb_decoder;
+AVCodec ff_libopencore_amrwb_decoder;
+AVCodec ff_libopenjpeg_encoder;
+AVCodec ff_libopenjpeg_decoder;
+AVCodec ff_libopus_encoder;
+AVCodec ff_libopus_decoder;
+AVCodec ff_libschroedinger_encoder;
+AVCodec ff_libschroedinger_decoder;
+AVCodec ff_libshine_encoder;
+AVCodec ff_libspeex_encoder;
+AVCodec ff_libspeex_decoder;
+AVCodec ff_libstagefright_h264_decoder;
+AVCodec ff_libtheora_encoder;
+AVCodec ff_libtwolame_encoder;
+AVCodec ff_libutvideo_encoder;
+AVCodec ff_libutvideo_decoder;
+AVCodec ff_libvo_aacenc_encoder;
+AVCodec ff_libvo_amrwbenc_encoder;
+AVCodec ff_libvorbis_encoder;
+AVCodec ff_libvorbis_decoder;
+AVCodec ff_libvpx_vp8_encoder;
+AVCodec ff_libvpx_vp8_decoder;
+AVCodec ff_libvpx_vp9_encoder;
+AVCodec ff_libvpx_vp9_decoder;
+AVCodec ff_libwavpack_encoder;
+AVCodec ff_libwebp_anim_encoder;
+AVCodec ff_libwebp_encoder;
+AVCodec ff_libx262_encoder;
+AVCodec ff_libx264_encoder;
+AVCodec ff_libx264rgb_encoder;
+AVCodec ff_libx265_encoder;
+AVCodec ff_libxavs_encoder;
+AVCodec ff_libxvid_encoder;
+AVCodec ff_libzvbi_teletext_decoder;
+AVCodec ff_libaacplus_encoder;
+AVCodec ff_bintext_decoder;
+AVCodec ff_xbin_decoder;
+AVCodec ff_idf_decoder;
+AVCodec ff_libopenh264_encoder;
+AVCodec ff_h264_qsv_encoder;
+AVCodec ff_nvenc_encoder;
+AVCodec ff_nvenc_h264_encoder;
+AVCodec ff_nvenc_hevc_encoder;
+AVCodec ff_hevc_qsv_encoder;
+AVCodec ff_libkvazaar_encoder;
+AVCodec ff_mpeg2_qsv_encoder;
+AVCodec ff_dxv_decoder;
+AVCodec ff_mpeg2_mmal_decoder;
+AVCodec ff_rscc_decoder;
+AVCodec ff_screenpresso_decoder;
+AVCodec ff_sdx2_dpcm_decoder;
+AVCodec ff_vc1_mmal_decoder;
+AVCodec ff_wrapped_avframe_encoder;
+AVCodec ff_interplay_acm_decoder;
+AVCodec ff_xma1_decoder;
+AVCodec ff_xma2_decoder;
+AVCodec ff_adpcm_aica_decoder;
+AVCodec ff_adpcm_psx_decoder;
+AVCodec ff_text_encoder;
+AVCodec ff_vp9_cuvid_decoder;
+AVCodec ff_vp8_cuvid_decoder;
+AVCodec ff_vc1_cuvid_decoder;
+AVCodec ff_mjpeg_vaapi_encoder;
+AVCodec ff_hevc_vaapi_encoder;
+AVCodec ff_hevc_nvenc_encoder;
+AVCodec ff_hevc_cuvid_decoder;
+AVCodec ff_h264_videotoolbox_encoder;
+AVCodec ff_h264_vaapi_encoder;
+AVCodec ff_h264_omx_encoder;
+AVCodec ff_h264_nvenc_encoder;
+AVCodec ff_h264_cuvid_decoder;
+AVCodec ff_qdm2_at_decoder;
+AVCodec ff_qdmc_at_decoder;
+AVCodec ff_pcm_mulaw_at_decoder;
+AVCodec ff_pcm_mulaw_at_encoder;
+AVCodec ff_pcm_alaw_at_decoder;
+AVCodec ff_pcm_alaw_at_encoder;
+AVCodec ff_mp3_at_decoder;
+AVCodec ff_mp2_at_decoder;
+AVCodec ff_mp1_at_decoder;
+AVCodec ff_ilbc_at_decoder;
+AVCodec ff_ilbc_at_encoder;
+AVCodec ff_gsm_ms_at_decoder;
+AVCodec ff_eac3_at_decoder;
+AVCodec ff_amr_nb_at_decoder;
+AVCodec ff_alac_at_decoder;
+AVCodec ff_alac_at_encoder;
+AVCodec ff_adpcm_ima_qt_at_decoder;
+AVCodec ff_ac3_at_decoder;
+AVCodec ff_aac_at_decoder;
+AVCodec ff_aac_at_encoder;
+AVCodec ff_adpcm_mtaf_decoder;
+AVCodec ff_adpcm_ima_dat4_decoder;
+AVCodec ff_dst_decoder;
+AVCodec ff_ylc_decoder;
+AVCodec ff_truemotion2rt_decoder;
+AVCodec ff_sheervideo_decoder;
+AVCodec ff_magicyuv_decoder;
+AVCodec ff_m101_decoder;
+AVCodec ff_h264_mediacodec_decoder;
+
+AVCodecParser ff_aac_parser;
+AVCodecParser ff_aac_latm_parser;
+AVCodecParser ff_ac3_parser;
+AVCodecParser ff_adx_parser;
+AVCodecParser ff_bmp_parser;
+AVCodecParser ff_cavsvideo_parser;
+AVCodecParser ff_cook_parser;
+AVCodecParser ff_dca_parser;
+AVCodecParser ff_dirac_parser;
+AVCodecParser ff_dnxhd_parser;
+AVCodecParser ff_dpx_parser;
+AVCodecParser ff_dvaudio_parser;
+AVCodecParser ff_dvbsub_parser;
+AVCodecParser ff_dvdsub_parser;
+AVCodecParser ff_dvd_nav_parser;
+AVCodecParser ff_g729_parser;
+AVCodecParser ff_gsm_parser;
+AVCodecParser ff_h261_parser;
+AVCodecParser ff_h263_parser;
+AVCodecParser ff_h264_parser;
+AVCodecParser ff_hevc_parser;
+AVCodecParser ff_mjpeg_parser;
+AVCodecParser ff_mlp_parser;
+AVCodecParser ff_mpeg4video_parser;
+AVCodecParser ff_mpegaudio_parser;
+AVCodecParser ff_mpegvideo_parser;
+AVCodecParser ff_opus_parser;
+AVCodecParser ff_png_parser;
+AVCodecParser ff_pnm_parser;
+AVCodecParser ff_rv30_parser;
+AVCodecParser ff_rv40_parser;
+AVCodecParser ff_tak_parser;
+AVCodecParser ff_vc1_parser;
+AVCodecParser ff_vorbis_parser;
+AVCodecParser ff_vp3_parser;
+AVBitStreamFilter ff_aac_adtstoasc_bsf;
+AVBitStreamFilter ff_chomp_bsf;
+AVBitStreamFilter ff_dump_extradata_bsf;
+AVBitStreamFilter ff_h264_mp4toannexb_bsf;
+AVBitStreamFilter ff_hevc_mp4toannexb_bsf;
+AVBitStreamFilter ff_imx_dump_header_bsf;
+AVBitStreamFilter ff_mjpeg2jpeg_bsf;
+AVBitStreamFilter ff_mjpega_dump_header_bsf;
+AVBitStreamFilter ff_mp3_header_decompress_bsf;
+AVBitStreamFilter ff_mpeg4_unpack_bframes_bsf;
+AVBitStreamFilter ff_mov2textsub_bsf;
+AVBitStreamFilter ff_noise_bsf;
+AVBitStreamFilter ff_remove_extradata_bsf;
+AVBitStreamFilter ff_text2movsub_bsf;
+
+void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               const int chroma_format_idc) {}
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc) {}
+void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
+                            const int bit_depth, const int chroma_format_idc) {}
+void ff_me_cmp_init_static(void) {}
+int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options) { return 0; }
+void ff_frame_thread_encoder_free(AVCodecContext *avctx) {}
+int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr) { return 0; }
+void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc) {}
+void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc) {}
+void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc) {}
+void ff_vp7dsp_init(VP8DSPContext *c) {}
+void ff_vp78dsp_init_arm(VP8DSPContext *c) {}
+void ff_vp78dsp_init_ppc(VP8DSPContext *c) {}
+void ff_vp8dsp_init_arm(VP8DSPContext *c) {}
+void ff_vp8dsp_init_mips(VP8DSPContext *c) {}
+void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp) {}
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps) {}
+#if !defined(HAVE_64BIT_BUILD)
+void ff_flac_decorrelate_indep8_16_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
+void ff_flac_decorrelate_indep8_32_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
+void ff_flac_decorrelate_indep8_16_avx(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
+void ff_flac_decorrelate_indep8_32_sse2(uint8_t **out, int32_t **in, int channels, int len, int shift) {}
+#endif
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsf) {}
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                               uint8_t **poutbuf, int *poutbuf_size,
+                               const uint8_t *buf, int buf_size, int keyframe) { return 0; }
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name) { return NULL;}
+AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f) { return NULL; }
+void av_register_bitstream_filter(AVBitStreamFilter *bsf) {}
diff --git a/media/ffvpx/libavcodec/error_resilience.h b/media/ffvpx/libavcodec/error_resilience.h
new file mode 100644
index 000000000..d444ec3a3
--- /dev/null
+++ b/media/ffvpx/libavcodec/error_resilience.h
@@ -0,0 +1,96 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ERROR_RESILIENCE_H
+#define AVCODEC_ERROR_RESILIENCE_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "me_cmp.h"
+#include "thread.h"
+
+///< current MB is the first after a resync marker
+#define VP_START               1
+#define ER_AC_ERROR            2
+#define ER_DC_ERROR            4
+#define ER_MV_ERROR            8
+#define ER_AC_END              16
+#define ER_DC_END              32
+#define ER_MV_END              64
+
+#define ER_MB_ERROR (ER_AC_ERROR|ER_DC_ERROR|ER_MV_ERROR)
+#define ER_MB_END   (ER_AC_END|ER_DC_END|ER_MV_END)
+
+typedef struct ERPicture {
+    AVFrame *f;
+    ThreadFrame *tf;
+
+    // it is the caller's responsibility to allocate these buffers
+    int16_t (*motion_val[2])[2];
+    int8_t *ref_index[2];
+
+    uint32_t *mb_type;
+    int field_picture;
+} ERPicture;
+
+typedef struct ERContext {
+    AVCodecContext *avctx;
+    MECmpContext mecc;
+    int mecc_inited;
+
+    int *mb_index2xy;
+    int mb_num;
+    int mb_width, mb_height;
+    int mb_stride;
+    int b8_stride;
+
+    volatile int error_count;
+    int error_occurred;
+    uint8_t *error_status_table;
+    uint8_t *er_temp_buffer;
+    int16_t *dc_val[3];
+    uint8_t *mbskip_table;
+    uint8_t *mbintra_table;
+    int mv[2][4][2];
+
+    ERPicture cur_pic;
+    ERPicture last_pic;
+    ERPicture next_pic;
+
+    AVBufferRef *ref_index_buf[2];
+    AVBufferRef *motion_val_buf[2];
+
+    uint16_t pp_time;
+    uint16_t pb_time;
+    int quarter_sample;
+    int partitioned_frame;
+    int ref_count;
+
+    void (*decode_mb)(void *opaque, int ref, int mv_dir, int mv_type,
+                      int (*mv)[2][4][2],
+                      int mb_x, int mb_y, int mb_intra, int mb_skipped);
+    void *opaque;
+} ERContext;
+
+void ff_er_frame_start(ERContext *s);
+void ff_er_frame_end(ERContext *s);
+void ff_er_add_slice(ERContext *s, int startx, int starty, int endx, int endy,
+                     int status);
+
+#endif /* AVCODEC_ERROR_RESILIENCE_H */
diff --git a/media/ffvpx/libavcodec/fdctdsp.h b/media/ffvpx/libavcodec/fdctdsp.h
new file mode 100644
index 000000000..3e1f683b9
--- /dev/null
+++ b/media/ffvpx/libavcodec/fdctdsp.h
@@ -0,0 +1,37 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FDCTDSP_H
+#define AVCODEC_FDCTDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+typedef struct FDCTDSPContext {
+    void (*fdct)(int16_t *block /* align 16 */);
+    void (*fdct248)(int16_t *block /* align 16 */);
+} FDCTDSPContext;
+
+void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx);
+void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_fdctdsp_init_x86(FDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+
+#endif /* AVCODEC_FDCTDSP_H */
diff --git a/media/ffvpx/libavcodec/ff_options_table.h b/media/ffvpx/libavcodec/ff_options_table.h
new file mode 100644
index 000000000..ee01275d3
--- /dev/null
+++ b/media/ffvpx/libavcodec/ff_options_table.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OPTIONS_TABLE_H
+#define AVCODEC_OPTIONS_TABLE_H
+
+#include "libavutil/opt.h"
+static const AVOption avcodec_options[] = {
+{NULL},
+};
+
+#endif /* AVCODEC_OPTIONS_TABLE_H */
diff --git a/media/ffvpx/libavcodec/flac.c b/media/ffvpx/libavcodec/flac.c
new file mode 100644
index 000000000..f5154b914
--- /dev/null
+++ b/media/ffvpx/libavcodec/flac.c
@@ -0,0 +1,237 @@
+/*
+ * FLAC common code
+ * Copyright (c) 2009 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+#include "libavutil/log.h"
+#include "bytestream.h"
+#include "get_bits.h"
+#include "flac.h"
+#include "flacdata.h"
+
+static const int8_t sample_size_table[] = { 0, 8, 12, 0, 16, 20, 24, 0 };
+
+static const uint64_t flac_channel_layouts[8] = {
+    AV_CH_LAYOUT_MONO,
+    AV_CH_LAYOUT_STEREO,
+    AV_CH_LAYOUT_SURROUND,
+    AV_CH_LAYOUT_QUAD,
+    AV_CH_LAYOUT_5POINT0,
+    AV_CH_LAYOUT_5POINT1,
+    AV_CH_LAYOUT_6POINT1,
+    AV_CH_LAYOUT_7POINT1
+};
+
+static int64_t get_utf8(GetBitContext *gb)
+{
+    int64_t val;
+    GET_UTF8(val, get_bits(gb, 8), return -1;)
+    return val;
+}
+
+int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
+                                FLACFrameInfo *fi, int log_level_offset)
+{
+    int bs_code, sr_code, bps_code;
+
+    /* frame sync code */
+    if ((get_bits(gb, 15) & 0x7FFF) != 0x7FFC) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset, "invalid sync code\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* variable block size stream code */
+    fi->is_var_size = get_bits1(gb);
+
+    /* block size and sample rate codes */
+    bs_code = get_bits(gb, 4);
+    sr_code = get_bits(gb, 4);
+
+    /* channels and decorrelation */
+    fi->ch_mode = get_bits(gb, 4);
+    if (fi->ch_mode < FLAC_MAX_CHANNELS) {
+        fi->channels = fi->ch_mode + 1;
+        fi->ch_mode = FLAC_CHMODE_INDEPENDENT;
+    } else if (fi->ch_mode < FLAC_MAX_CHANNELS + FLAC_CHMODE_MID_SIDE) {
+        fi->channels = 2;
+        fi->ch_mode -= FLAC_MAX_CHANNELS - 1;
+    } else {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "invalid channel mode: %d\n", fi->ch_mode);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* bits per sample */
+    bps_code = get_bits(gb, 3);
+    if (bps_code == 3 || bps_code == 7) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "invalid sample size code (%d)\n",
+               bps_code);
+        return AVERROR_INVALIDDATA;
+    }
+    fi->bps = sample_size_table[bps_code];
+
+    /* reserved bit */
+    if (get_bits1(gb)) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "broken stream, invalid padding\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* sample or frame count */
+    fi->frame_or_sample_num = get_utf8(gb);
+    if (fi->frame_or_sample_num < 0) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "sample/frame number invalid; utf8 fscked\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* blocksize */
+    if (bs_code == 0) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "reserved blocksize code: 0\n");
+        return AVERROR_INVALIDDATA;
+    } else if (bs_code == 6) {
+        fi->blocksize = get_bits(gb, 8) + 1;
+    } else if (bs_code == 7) {
+        fi->blocksize = get_bits(gb, 16) + 1;
+    } else {
+        fi->blocksize = ff_flac_blocksize_table[bs_code];
+    }
+
+    /* sample rate */
+    if (sr_code < 12) {
+        fi->samplerate = ff_flac_sample_rate_table[sr_code];
+    } else if (sr_code == 12) {
+        fi->samplerate = get_bits(gb, 8) * 1000;
+    } else if (sr_code == 13) {
+        fi->samplerate = get_bits(gb, 16);
+    } else if (sr_code == 14) {
+        fi->samplerate = get_bits(gb, 16) * 10;
+    } else {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "illegal sample rate code %d\n",
+               sr_code);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* header CRC-8 check */
+    skip_bits(gb, 8);
+    if (av_crc(av_crc_get_table(AV_CRC_8_ATM), 0, gb->buffer,
+               get_bits_count(gb)/8)) {
+        av_log(avctx, AV_LOG_ERROR + log_level_offset,
+               "header crc mismatch\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+int ff_flac_get_max_frame_size(int blocksize, int ch, int bps)
+{
+    /* Technically, there is no limit to FLAC frame size, but an encoder
+       should not write a frame that is larger than if verbatim encoding mode
+       were to be used. */
+
+    int count;
+
+    count = 16;                  /* frame header */
+    count += ch * ((7+bps+7)/8); /* subframe headers */
+    if (ch == 2) {
+        /* for stereo, need to account for using decorrelation */
+        count += (( 2*bps+1) * blocksize + 7) / 8;
+    } else {
+        count += ( ch*bps    * blocksize + 7) / 8;
+    }
+    count += 2; /* frame footer */
+
+    return count;
+}
+
+int ff_flac_is_extradata_valid(AVCodecContext *avctx,
+                               enum FLACExtradataFormat *format,
+                               uint8_t **streaminfo_start)
+{
+    if (!avctx->extradata || avctx->extradata_size < FLAC_STREAMINFO_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "extradata NULL or too small.\n");
+        return 0;
+    }
+    if (AV_RL32(avctx->extradata) != MKTAG('f','L','a','C')) {
+        /* extradata contains STREAMINFO only */
+        if (avctx->extradata_size != FLAC_STREAMINFO_SIZE) {
+            av_log(avctx, AV_LOG_WARNING, "extradata contains %d bytes too many.\n",
+                   FLAC_STREAMINFO_SIZE-avctx->extradata_size);
+        }
+        *format = FLAC_EXTRADATA_FORMAT_STREAMINFO;
+        *streaminfo_start = avctx->extradata;
+    } else {
+        if (avctx->extradata_size < 8+FLAC_STREAMINFO_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "extradata too small.\n");
+            return 0;
+        }
+        *format = FLAC_EXTRADATA_FORMAT_FULL_HEADER;
+        *streaminfo_start = &avctx->extradata[8];
+    }
+    return 1;
+}
+
+void ff_flac_set_channel_layout(AVCodecContext *avctx)
+{
+    if (avctx->channels <= FF_ARRAY_ELEMS(flac_channel_layouts))
+        avctx->channel_layout = flac_channel_layouts[avctx->channels - 1];
+    else
+        avctx->channel_layout = 0;
+}
+
+void ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
+                              const uint8_t *buffer)
+{
+    GetBitContext gb;
+    init_get_bits(&gb, buffer, FLAC_STREAMINFO_SIZE*8);
+
+    skip_bits(&gb, 16); /* skip min blocksize */
+    s->max_blocksize = get_bits(&gb, 16);
+    if (s->max_blocksize < FLAC_MIN_BLOCKSIZE) {
+        av_log(avctx, AV_LOG_WARNING, "invalid max blocksize: %d\n",
+               s->max_blocksize);
+        s->max_blocksize = 16;
+    }
+
+    skip_bits(&gb, 24); /* skip min frame size */
+    s->max_framesize = get_bits_long(&gb, 24);
+
+    s->samplerate = get_bits_long(&gb, 20);
+    s->channels = get_bits(&gb, 3) + 1;
+    s->bps = get_bits(&gb, 5) + 1;
+
+    avctx->channels = s->channels;
+    avctx->sample_rate = s->samplerate;
+    avctx->bits_per_raw_sample = s->bps;
+
+    if (!avctx->channel_layout ||
+        av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels)
+        ff_flac_set_channel_layout(avctx);
+
+    s->samples = get_bits64(&gb, 36);
+
+    skip_bits_long(&gb, 64); /* md5 sum */
+    skip_bits_long(&gb, 64); /* md5 sum */
+}
diff --git a/media/ffvpx/libavcodec/flac.h b/media/ffvpx/libavcodec/flac.h
new file mode 100644
index 000000000..96d971c9d
--- /dev/null
+++ b/media/ffvpx/libavcodec/flac.h
@@ -0,0 +1,153 @@
+/*
+ * FLAC (Free Lossless Audio Codec) decoder/demuxer common functions
+ * Copyright (c) 2008 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FLAC (Free Lossless Audio Codec) decoder/demuxer common functions
+ */
+
+#ifndef AVCODEC_FLAC_H
+#define AVCODEC_FLAC_H
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "get_bits.h"
+
+#define FLAC_STREAMINFO_SIZE   34
+#define FLAC_MAX_CHANNELS       8
+#define FLAC_MIN_BLOCKSIZE     16
+#define FLAC_MAX_BLOCKSIZE  65535
+#define FLAC_MIN_FRAME_SIZE    11
+
+enum {
+    FLAC_CHMODE_INDEPENDENT = 0,
+    FLAC_CHMODE_LEFT_SIDE   = 1,
+    FLAC_CHMODE_RIGHT_SIDE  = 2,
+    FLAC_CHMODE_MID_SIDE    = 3,
+};
+
+enum {
+    FLAC_METADATA_TYPE_STREAMINFO = 0,
+    FLAC_METADATA_TYPE_PADDING,
+    FLAC_METADATA_TYPE_APPLICATION,
+    FLAC_METADATA_TYPE_SEEKTABLE,
+    FLAC_METADATA_TYPE_VORBIS_COMMENT,
+    FLAC_METADATA_TYPE_CUESHEET,
+    FLAC_METADATA_TYPE_PICTURE,
+    FLAC_METADATA_TYPE_INVALID = 127
+};
+
+enum FLACExtradataFormat {
+    FLAC_EXTRADATA_FORMAT_STREAMINFO  = 0,
+    FLAC_EXTRADATA_FORMAT_FULL_HEADER = 1
+};
+
+#define FLACCOMMONINFO \
+    int samplerate;         /**< sample rate                             */\
+    int channels;           /**< number of channels                      */\
+    int bps;                /**< bits-per-sample                         */\
+
+/**
+ * Data needed from the Streaminfo header for use by the raw FLAC demuxer
+ * and/or the FLAC decoder.
+ */
+#define FLACSTREAMINFO \
+    FLACCOMMONINFO \
+    int max_blocksize;      /**< maximum block size, in samples          */\
+    int max_framesize;      /**< maximum frame size, in bytes            */\
+    int64_t samples;        /**< total number of samples                 */\
+
+typedef struct FLACStreaminfo {
+    FLACSTREAMINFO
+} FLACStreaminfo;
+
+typedef struct FLACFrameInfo {
+    FLACCOMMONINFO
+    int blocksize;          /**< block size of the frame                 */
+    int ch_mode;            /**< channel decorrelation mode              */
+    int64_t frame_or_sample_num;    /**< frame number or sample number   */
+    int is_var_size;                /**< specifies if the stream uses variable
+                                         block sizes or a fixed block size;
+                                         also determines the meaning of
+                                         frame_or_sample_num             */
+} FLACFrameInfo;
+
+/**
+ * Parse the Streaminfo metadata block
+ * @param[out] avctx   codec context to set basic stream parameters
+ * @param[out] s       where parsed information is stored
+ * @param[in]  buffer  pointer to start of 34-byte streaminfo data
+ */
+void ff_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *s,
+                              const uint8_t *buffer);
+
+/**
+ * Validate the FLAC extradata.
+ * @param[in]  avctx codec context containing the extradata.
+ * @param[out] format extradata format.
+ * @param[out] streaminfo_start pointer to start of 34-byte STREAMINFO data.
+ * @return 1 if valid, 0 if not valid.
+ */
+int ff_flac_is_extradata_valid(AVCodecContext *avctx,
+                               enum FLACExtradataFormat *format,
+                               uint8_t **streaminfo_start);
+
+/**
+ * Calculate an estimate for the maximum frame size based on verbatim mode.
+ * @param blocksize block size, in samples
+ * @param ch number of channels
+ * @param bps bits-per-sample
+ */
+int ff_flac_get_max_frame_size(int blocksize, int ch, int bps);
+
+/**
+ * Validate and decode a frame header.
+ * @param      avctx AVCodecContext to use as av_log() context
+ * @param      gb    GetBitContext from which to read frame header
+ * @param[out] fi    frame information
+ * @param      log_level_offset  log level offset. can be used to silence error messages.
+ * @return non-zero on error, 0 if ok
+ */
+int ff_flac_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
+                                FLACFrameInfo *fi, int log_level_offset);
+
+void ff_flac_set_channel_layout(AVCodecContext *avctx);
+
+/**
+ * Parse the metadata block parameters from the header.
+ * @param[in]  block_header header data, at least 4 bytes
+ * @param[out] last indicator for last metadata block
+ * @param[out] type metadata block type
+ * @param[out] size metadata block size
+ */
+static av_always_inline void flac_parse_block_header(const uint8_t *block_header,
+                                                      int *last, int *type, int *size)
+{
+    int tmp = bytestream_get_byte(&block_header);
+    if (last)
+        *last = tmp & 0x80;
+    if (type)
+        *type = tmp & 0x7F;
+    if (size)
+        *size = bytestream_get_be24(&block_header);
+}
+
+#endif /* AVCODEC_FLAC_H */
diff --git a/media/ffvpx/libavcodec/flac_parser.c b/media/ffvpx/libavcodec/flac_parser.c
new file mode 100644
index 000000000..f5cc35a4f
--- /dev/null
+++ b/media/ffvpx/libavcodec/flac_parser.c
@@ -0,0 +1,755 @@
+/*
+ * FLAC parser
+ * Copyright (c) 2010 Michael Chinen
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FLAC parser
+ *
+ * The FLAC parser buffers input until FLAC_MIN_HEADERS has been found.
+ * Each time it finds and verifies a CRC-8 header it sees which of the
+ * FLAC_MAX_SEQUENTIAL_HEADERS that came before it have a valid CRC-16 footer
+ * that ends at the newly found header.
+ * Headers are scored by FLAC_HEADER_BASE_SCORE plus the max of its crc-verified
+ * children, penalized by changes in sample rate, frame number, etc.
+ * The parser returns the frame with the highest score.
+ **/
+
+#include "libavutil/attributes.h"
+#include "libavutil/crc.h"
+#include "libavutil/fifo.h"
+#include "bytestream.h"
+#include "parser.h"
+#include "flac.h"
+
+/** maximum number of adjacent headers that compare CRCs against each other   */
+#define FLAC_MAX_SEQUENTIAL_HEADERS 4
+/** minimum number of headers buffered and checked before returning frames    */
+#define FLAC_MIN_HEADERS 10
+/** estimate for average size of a FLAC frame                                 */
+#define FLAC_AVG_FRAME_SIZE 8192
+
+/** scoring settings for score_header */
+#define FLAC_HEADER_BASE_SCORE        10
+#define FLAC_HEADER_CHANGED_PENALTY   7
+#define FLAC_HEADER_CRC_FAIL_PENALTY  50
+#define FLAC_HEADER_NOT_PENALIZED_YET 100000
+#define FLAC_HEADER_NOT_SCORED_YET    -100000
+
+/** largest possible size of flac header */
+#define MAX_FRAME_HEADER_SIZE 16
+
+typedef struct FLACHeaderMarker {
+    int offset;       /**< byte offset from start of FLACParseContext->buffer */
+    int *link_penalty;  /**< pointer to array of local scores between this header
+                           and the one at a distance equal array position     */
+    int max_score;    /**< maximum score found after checking each child that
+                           has a valid CRC                                    */
+    FLACFrameInfo fi; /**< decoded frame header info                          */
+    struct FLACHeaderMarker *next;       /**< next CRC-8 verified header that
+                                              immediately follows this one in
+                                              the bytestream                  */
+    struct FLACHeaderMarker *best_child; /**< following frame header with
+                                              which this frame has the best
+                                              score with                      */
+} FLACHeaderMarker;
+
+typedef struct FLACParseContext {
+    AVCodecParserContext *pc;      /**< parent context                        */
+    AVCodecContext *avctx;         /**< codec context pointer for logging     */
+    FLACHeaderMarker *headers;     /**< linked-list that starts at the first
+                                        CRC-8 verified header within buffer   */
+    FLACHeaderMarker *best_header; /**< highest scoring header within buffer  */
+    int nb_headers_found;          /**< number of headers found in the last
+                                        flac_parse() call                     */
+    int nb_headers_buffered;       /**< number of headers that are buffered   */
+    int best_header_valid;         /**< flag set when the parser returns junk;
+                                        if set return best_header next time   */
+    AVFifoBuffer *fifo_buf;        /**< buffer to store all data until headers
+                                        can be verified                       */
+    int end_padded;                /**< specifies if fifo_buf's end is padded */
+    uint8_t *wrap_buf;             /**< general fifo read buffer when wrapped */
+    int wrap_buf_allocated_size;   /**< actual allocated size of the buffer   */
+    FLACFrameInfo last_fi;         /**< last decoded frame header info        */
+    int last_fi_valid;             /**< set if last_fi is valid               */
+} FLACParseContext;
+
+static int frame_header_is_valid(AVCodecContext *avctx, const uint8_t *buf,
+                                 FLACFrameInfo *fi)
+{
+    GetBitContext gb;
+    init_get_bits(&gb, buf, MAX_FRAME_HEADER_SIZE * 8);
+    return !ff_flac_decode_frame_header(avctx, &gb, fi, 127);
+}
+
+/**
+ * Non-destructive fast fifo pointer fetching
+ * Returns a pointer from the specified offset.
+ * If possible the pointer points within the fifo buffer.
+ * Otherwise (if it would cause a wrap around,) a pointer to a user-specified
+ * buffer is used.
+ * The pointer can be NULL.  In any case it will be reallocated to hold the size.
+ * If the returned pointer will be used after subsequent calls to flac_fifo_read_wrap
+ * then the subsequent calls should pass in a different wrap_buf so as to not
+ * overwrite the contents of the previous wrap_buf.
+ * This function is based on av_fifo_generic_read, which is why there is a comment
+ * about a memory barrier for SMP.
+ */
+static uint8_t* flac_fifo_read_wrap(FLACParseContext *fpc, int offset, int len,
+                               uint8_t** wrap_buf, int* allocated_size)
+{
+    AVFifoBuffer *f   = fpc->fifo_buf;
+    uint8_t *start    = f->rptr + offset;
+    uint8_t *tmp_buf;
+
+    if (start >= f->end)
+        start -= f->end - f->buffer;
+    if (f->end - start >= len)
+        return start;
+
+    tmp_buf = av_fast_realloc(*wrap_buf, allocated_size, len);
+
+    if (!tmp_buf) {
+        av_log(fpc->avctx, AV_LOG_ERROR,
+               "couldn't reallocate wrap buffer of size %d", len);
+        return NULL;
+    }
+    *wrap_buf = tmp_buf;
+    do {
+        int seg_len = FFMIN(f->end - start, len);
+        memcpy(tmp_buf, start, seg_len);
+        tmp_buf = (uint8_t*)tmp_buf + seg_len;
+// memory barrier needed for SMP here in theory
+
+        start += seg_len - (f->end - f->buffer);
+        len -= seg_len;
+    } while (len > 0);
+
+    return *wrap_buf;
+}
+
+/**
+ * Return a pointer in the fifo buffer where the offset starts at until
+ * the wrap point or end of request.
+ * len will contain the valid length of the returned buffer.
+ * A second call to flac_fifo_read (with new offset and len) should be called
+ * to get the post-wrap buf if the returned len is less than the requested.
+ **/
+static uint8_t* flac_fifo_read(FLACParseContext *fpc, int offset, int *len)
+{
+    AVFifoBuffer *f   = fpc->fifo_buf;
+    uint8_t *start    = f->rptr + offset;
+
+    if (start >= f->end)
+        start -= f->end - f->buffer;
+    *len = FFMIN(*len, f->end - start);
+    return start;
+}
+
+static int find_headers_search_validate(FLACParseContext *fpc, int offset)
+{
+    FLACFrameInfo fi;
+    uint8_t *header_buf;
+    int size = 0;
+    header_buf = flac_fifo_read_wrap(fpc, offset,
+                                     MAX_FRAME_HEADER_SIZE,
+                                     &fpc->wrap_buf,
+                                     &fpc->wrap_buf_allocated_size);
+    if (frame_header_is_valid(fpc->avctx, header_buf, &fi)) {
+        FLACHeaderMarker **end_handle = &fpc->headers;
+        int i;
+
+        size = 0;
+        while (*end_handle) {
+            end_handle = &(*end_handle)->next;
+            size++;
+        }
+
+        *end_handle = av_mallocz(sizeof(**end_handle));
+        if (!*end_handle) {
+            av_log(fpc->avctx, AV_LOG_ERROR,
+                   "couldn't allocate FLACHeaderMarker\n");
+            return AVERROR(ENOMEM);
+        }
+        (*end_handle)->fi           = fi;
+        (*end_handle)->offset       = offset;
+        (*end_handle)->link_penalty = av_malloc(sizeof(int) *
+                                            FLAC_MAX_SEQUENTIAL_HEADERS);
+        if (!(*end_handle)->link_penalty) {
+            av_freep(end_handle);
+            av_log(fpc->avctx, AV_LOG_ERROR,
+                   "couldn't allocate link_penalty\n");
+            return AVERROR(ENOMEM);
+        }
+
+        for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS; i++)
+            (*end_handle)->link_penalty[i] = FLAC_HEADER_NOT_PENALIZED_YET;
+
+        fpc->nb_headers_found++;
+        size++;
+    }
+    return size;
+}
+
+static int find_headers_search(FLACParseContext *fpc, uint8_t *buf, int buf_size,
+                               int search_start)
+
+{
+    int size = 0, mod_offset = (buf_size - 1) % 4, i, j;
+    uint32_t x;
+
+    for (i = 0; i < mod_offset; i++) {
+        if ((AV_RB16(buf + i) & 0xFFFE) == 0xFFF8)
+            size = find_headers_search_validate(fpc, search_start + i);
+    }
+
+    for (; i < buf_size - 1; i += 4) {
+        x = AV_RB32(buf + i);
+        if (((x & ~(x + 0x01010101)) & 0x80808080)) {
+            for (j = 0; j < 4; j++) {
+                if ((AV_RB16(buf + i + j) & 0xFFFE) == 0xFFF8)
+                    size = find_headers_search_validate(fpc, search_start + i + j);
+            }
+        }
+    }
+    return size;
+}
+
+static int find_new_headers(FLACParseContext *fpc, int search_start)
+{
+    FLACHeaderMarker *end;
+    int search_end, size = 0, read_len, temp;
+    uint8_t *buf;
+    fpc->nb_headers_found = 0;
+
+    /* Search for a new header of at most 16 bytes. */
+    search_end = av_fifo_size(fpc->fifo_buf) - (MAX_FRAME_HEADER_SIZE - 1);
+    read_len   = search_end - search_start + 1;
+    buf        = flac_fifo_read(fpc, search_start, &read_len);
+    size       = find_headers_search(fpc, buf, read_len, search_start);
+    search_start += read_len - 1;
+
+    /* If fifo end was hit do the wrap around. */
+    if (search_start != search_end) {
+        uint8_t wrap[2];
+
+        wrap[0]  = buf[read_len - 1];
+        read_len = search_end - search_start + 1;
+
+        /* search_start + 1 is the post-wrap offset in the fifo. */
+        buf      = flac_fifo_read(fpc, search_start + 1, &read_len);
+        wrap[1]  = buf[0];
+
+        if ((AV_RB16(wrap) & 0xFFFE) == 0xFFF8) {
+            temp = find_headers_search_validate(fpc, search_start);
+            size = FFMAX(size, temp);
+        }
+        search_start++;
+
+        /* Continue to do the last half of the wrap. */
+        temp     = find_headers_search(fpc, buf, read_len, search_start);
+        size     = FFMAX(size, temp);
+        search_start += read_len - 1;
+    }
+
+    /* Return the size even if no new headers were found. */
+    if (!size && fpc->headers)
+        for (end = fpc->headers; end; end = end->next)
+            size++;
+    return size;
+}
+
+static int check_header_fi_mismatch(FLACParseContext  *fpc,
+                                    FLACFrameInfo     *header_fi,
+                                    FLACFrameInfo     *child_fi,
+                                    int                log_level_offset)
+{
+    int deduction = 0;
+    if (child_fi->samplerate != header_fi->samplerate) {
+        deduction += FLAC_HEADER_CHANGED_PENALTY;
+        av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
+               "sample rate change detected in adjacent frames\n");
+    }
+    if (child_fi->bps != header_fi->bps) {
+        deduction += FLAC_HEADER_CHANGED_PENALTY;
+        av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
+               "bits per sample change detected in adjacent frames\n");
+    }
+    if (child_fi->is_var_size != header_fi->is_var_size) {
+        /* Changing blocking strategy not allowed per the spec */
+        deduction += FLAC_HEADER_BASE_SCORE;
+        av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
+               "blocking strategy change detected in adjacent frames\n");
+    }
+    if (child_fi->channels != header_fi->channels) {
+        deduction += FLAC_HEADER_CHANGED_PENALTY;
+        av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
+               "number of channels change detected in adjacent frames\n");
+    }
+    return deduction;
+}
+
+static int check_header_mismatch(FLACParseContext  *fpc,
+                                 FLACHeaderMarker  *header,
+                                 FLACHeaderMarker  *child,
+                                 int                log_level_offset)
+{
+    FLACFrameInfo  *header_fi = &header->fi, *child_fi = &child->fi;
+    int deduction, deduction_expected = 0, i;
+    deduction = check_header_fi_mismatch(fpc, header_fi, child_fi,
+                                         log_level_offset);
+    /* Check sample and frame numbers. */
+    if ((child_fi->frame_or_sample_num - header_fi->frame_or_sample_num
+         != header_fi->blocksize) &&
+        (child_fi->frame_or_sample_num
+         != header_fi->frame_or_sample_num + 1)) {
+        FLACHeaderMarker *curr;
+        int expected_frame_num, expected_sample_num;
+        /* If there are frames in the middle we expect this deduction,
+           as they are probably valid and this one follows it */
+
+        expected_frame_num = expected_sample_num = header_fi->frame_or_sample_num;
+        curr = header;
+        while (curr != child) {
+            /* Ignore frames that failed all crc checks */
+            for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS; i++) {
+                if (curr->link_penalty[i] < FLAC_HEADER_CRC_FAIL_PENALTY) {
+                    expected_frame_num++;
+                    expected_sample_num += curr->fi.blocksize;
+                    break;
+                }
+            }
+            curr = curr->next;
+        }
+
+        if (expected_frame_num  == child_fi->frame_or_sample_num ||
+            expected_sample_num == child_fi->frame_or_sample_num)
+            deduction_expected = deduction ? 0 : 1;
+
+        deduction += FLAC_HEADER_CHANGED_PENALTY;
+        av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
+                   "sample/frame number mismatch in adjacent frames\n");
+    }
+
+    /* If we have suspicious headers, check the CRC between them */
+    if (deduction && !deduction_expected) {
+        FLACHeaderMarker *curr;
+        int read_len;
+        uint8_t *buf;
+        uint32_t crc = 1;
+        int inverted_test = 0;
+
+        /* Since CRC is expensive only do it if we haven't yet.
+           This assumes a CRC penalty is greater than all other check penalties */
+        curr = header->next;
+        for (i = 0; i < FLAC_MAX_SEQUENTIAL_HEADERS && curr != child; i++)
+            curr = curr->next;
+
+        if (header->link_penalty[i] < FLAC_HEADER_CRC_FAIL_PENALTY ||
+            header->link_penalty[i] == FLAC_HEADER_NOT_PENALIZED_YET) {
+            FLACHeaderMarker *start, *end;
+
+            /* Although overlapping chains are scored, the crc should never
+               have to be computed twice for a single byte. */
+            start = header;
+            end   = child;
+            if (i > 0 &&
+                header->link_penalty[i - 1] >= FLAC_HEADER_CRC_FAIL_PENALTY) {
+                while (start->next != child)
+                    start = start->next;
+                inverted_test = 1;
+            } else if (i > 0 &&
+                       header->next->link_penalty[i-1] >=
+                       FLAC_HEADER_CRC_FAIL_PENALTY ) {
+                end = header->next;
+                inverted_test = 1;
+            }
+
+            read_len = end->offset - start->offset;
+            buf      = flac_fifo_read(fpc, start->offset, &read_len);
+            crc      = av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, buf, read_len);
+            read_len = (end->offset - start->offset) - read_len;
+
+            if (read_len) {
+                buf = flac_fifo_read(fpc, end->offset - read_len, &read_len);
+                crc = av_crc(av_crc_get_table(AV_CRC_16_ANSI), crc, buf, read_len);
+            }
+        }
+
+        if (!crc ^ !inverted_test) {
+            deduction += FLAC_HEADER_CRC_FAIL_PENALTY;
+            av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
+                   "crc check failed from offset %i (frame %"PRId64") to %i (frame %"PRId64")\n",
+                   header->offset, header_fi->frame_or_sample_num,
+                   child->offset, child_fi->frame_or_sample_num);
+        }
+    }
+    return deduction;
+}
+
+/**
+ * Score a header.
+ *
+ * Give FLAC_HEADER_BASE_SCORE points to a frame for existing.
+ * If it has children, (subsequent frames of which the preceding CRC footer
+ * validates against this one,) then take the maximum score of the children,
+ * with a penalty of FLAC_HEADER_CHANGED_PENALTY applied for each change to
+ * bps, sample rate, channels, but not decorrelation mode, or blocksize,
+ * because it can change often.
+ **/
+static int score_header(FLACParseContext *fpc, FLACHeaderMarker *header)
+{
+    FLACHeaderMarker *child;
+    int dist = 0;
+    int child_score;
+    int base_score = FLAC_HEADER_BASE_SCORE;
+    if (header->max_score != FLAC_HEADER_NOT_SCORED_YET)
+        return header->max_score;
+
+    /* Modify the base score with changes from the last output header */
+    if (fpc->last_fi_valid) {
+        /* Silence the log since this will be repeated if selected */
+        base_score -= check_header_fi_mismatch(fpc, &fpc->last_fi, &header->fi,
+                                               AV_LOG_DEBUG);
+    }
+
+    header->max_score = base_score;
+
+    /* Check and compute the children's scores. */
+    child = header->next;
+    for (dist = 0; dist < FLAC_MAX_SEQUENTIAL_HEADERS && child; dist++) {
+        /* Look at the child's frame header info and penalize suspicious
+           changes between the headers. */
+        if (header->link_penalty[dist] == FLAC_HEADER_NOT_PENALIZED_YET) {
+            header->link_penalty[dist] = check_header_mismatch(fpc, header,
+                                                               child, AV_LOG_DEBUG);
+        }
+        child_score = score_header(fpc, child) - header->link_penalty[dist];
+
+        if (FLAC_HEADER_BASE_SCORE + child_score > header->max_score) {
+            /* Keep the child because the frame scoring is dynamic. */
+            header->best_child = child;
+            header->max_score  = base_score + child_score;
+        }
+        child = child->next;
+    }
+
+    return header->max_score;
+}
+
+static void score_sequences(FLACParseContext *fpc)
+{
+    FLACHeaderMarker *curr;
+    int best_score = 0;//FLAC_HEADER_NOT_SCORED_YET;
+    /* First pass to clear all old scores. */
+    for (curr = fpc->headers; curr; curr = curr->next)
+        curr->max_score = FLAC_HEADER_NOT_SCORED_YET;
+
+    /* Do a second pass to score them all. */
+    for (curr = fpc->headers; curr; curr = curr->next) {
+        if (score_header(fpc, curr) > best_score) {
+            fpc->best_header = curr;
+            best_score       = curr->max_score;
+        }
+    }
+}
+
+static int get_best_header(FLACParseContext* fpc, const uint8_t **poutbuf,
+                           int *poutbuf_size)
+{
+    FLACHeaderMarker *header = fpc->best_header;
+    FLACHeaderMarker *child  = header->best_child;
+    if (!child) {
+        *poutbuf_size = av_fifo_size(fpc->fifo_buf) - header->offset;
+    } else {
+        *poutbuf_size = child->offset - header->offset;
+
+        /* If the child has suspicious changes, log them */
+        check_header_mismatch(fpc, header, child, 0);
+    }
+
+    if (header->fi.channels != fpc->avctx->channels ||
+        !fpc->avctx->channel_layout) {
+        fpc->avctx->channels = header->fi.channels;
+        ff_flac_set_channel_layout(fpc->avctx);
+    }
+    fpc->avctx->sample_rate = header->fi.samplerate;
+    fpc->pc->duration       = header->fi.blocksize;
+    *poutbuf = flac_fifo_read_wrap(fpc, header->offset, *poutbuf_size,
+                                        &fpc->wrap_buf,
+                                        &fpc->wrap_buf_allocated_size);
+
+
+    if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){
+        if (header->fi.is_var_size)
+          fpc->pc->pts = header->fi.frame_or_sample_num;
+        else if (header->best_child)
+          fpc->pc->pts = header->fi.frame_or_sample_num * header->fi.blocksize;
+    }
+
+    fpc->best_header_valid = 0;
+    fpc->last_fi_valid = 1;
+    fpc->last_fi = header->fi;
+
+    /* Return the negative overread index so the client can compute pos.
+       This should be the amount overread to the beginning of the child */
+    if (child)
+        return child->offset - av_fifo_size(fpc->fifo_buf);
+    return 0;
+}
+
+static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                      const uint8_t **poutbuf, int *poutbuf_size,
+                      const uint8_t *buf, int buf_size)
+{
+    FLACParseContext *fpc = s->priv_data;
+    FLACHeaderMarker *curr;
+    int nb_headers;
+    const uint8_t *read_end   = buf;
+    const uint8_t *read_start = buf;
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        FLACFrameInfo fi;
+        if (frame_header_is_valid(avctx, buf, &fi)) {
+            s->duration = fi.blocksize;
+            if (!avctx->sample_rate)
+                avctx->sample_rate = fi.samplerate;
+            if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){
+                fpc->pc->pts = fi.frame_or_sample_num;
+                if (!fi.is_var_size)
+                  fpc->pc->pts *= fi.blocksize;
+            }
+        }
+        *poutbuf      = buf;
+        *poutbuf_size = buf_size;
+        return buf_size;
+    }
+
+    fpc->avctx = avctx;
+    if (fpc->best_header_valid)
+        return get_best_header(fpc, poutbuf, poutbuf_size);
+
+    /* If a best_header was found last call remove it with the buffer data. */
+    if (fpc->best_header && fpc->best_header->best_child) {
+        FLACHeaderMarker *temp;
+        FLACHeaderMarker *best_child = fpc->best_header->best_child;
+
+        /* Remove headers in list until the end of the best_header. */
+        for (curr = fpc->headers; curr != best_child; curr = temp) {
+            if (curr != fpc->best_header) {
+                av_log(avctx, AV_LOG_DEBUG,
+                       "dropping low score %i frame header from offset %i to %i\n",
+                       curr->max_score, curr->offset, curr->next->offset);
+            }
+            temp = curr->next;
+            av_freep(&curr->link_penalty);
+            av_free(curr);
+            fpc->nb_headers_buffered--;
+        }
+        /* Release returned data from ring buffer. */
+        av_fifo_drain(fpc->fifo_buf, best_child->offset);
+
+        /* Fix the offset for the headers remaining to match the new buffer. */
+        for (curr = best_child->next; curr; curr = curr->next)
+            curr->offset -= best_child->offset;
+
+        fpc->nb_headers_buffered--;
+        best_child->offset = 0;
+        fpc->headers       = best_child;
+        if (fpc->nb_headers_buffered >= FLAC_MIN_HEADERS) {
+            fpc->best_header = best_child;
+            return get_best_header(fpc, poutbuf, poutbuf_size);
+        }
+        fpc->best_header   = NULL;
+    } else if (fpc->best_header) {
+        /* No end frame no need to delete the buffer; probably eof */
+        FLACHeaderMarker *temp;
+
+        for (curr = fpc->headers; curr != fpc->best_header; curr = temp) {
+            temp = curr->next;
+            av_freep(&curr->link_penalty);
+            av_free(curr);
+        }
+        fpc->headers = fpc->best_header->next;
+        av_freep(&fpc->best_header->link_penalty);
+        av_freep(&fpc->best_header);
+    }
+
+    /* Find and score new headers.                                     */
+    /* buf_size is to zero when padding, so check for this since we do */
+    /* not want to try to read more input once we have found the end.  */
+    /* Note that as (non-modified) parameters, buf can be non-NULL,    */
+    /* while buf_size is 0.                                            */
+    while ((buf && buf_size && read_end < buf + buf_size &&
+            fpc->nb_headers_buffered < FLAC_MIN_HEADERS)
+           || ((!buf || !buf_size) && !fpc->end_padded)) {
+        int start_offset;
+
+        /* Pad the end once if EOF, to check the final region for headers. */
+        if (!buf || !buf_size) {
+            fpc->end_padded      = 1;
+            buf_size = MAX_FRAME_HEADER_SIZE;
+            read_end = read_start + MAX_FRAME_HEADER_SIZE;
+        } else {
+            /* The maximum read size is the upper-bound of what the parser
+               needs to have the required number of frames buffered */
+            int nb_desired = FLAC_MIN_HEADERS - fpc->nb_headers_buffered + 1;
+            read_end       = read_end + FFMIN(buf + buf_size - read_end,
+                                              nb_desired * FLAC_AVG_FRAME_SIZE);
+        }
+
+        if (!av_fifo_space(fpc->fifo_buf) &&
+            av_fifo_size(fpc->fifo_buf) / FLAC_AVG_FRAME_SIZE >
+            fpc->nb_headers_buffered * 20) {
+            /* There is less than one valid flac header buffered for 20 headers
+             * buffered. Therefore the fifo is most likely filled with invalid
+             * data and the input is not a flac file. */
+            goto handle_error;
+        }
+
+        /* Fill the buffer. */
+        if (   av_fifo_space(fpc->fifo_buf) < read_end - read_start
+            && av_fifo_realloc2(fpc->fifo_buf, (read_end - read_start) + 2*av_fifo_size(fpc->fifo_buf)) < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "couldn't reallocate buffer of size %"PTRDIFF_SPECIFIER"\n",
+                   (read_end - read_start) + av_fifo_size(fpc->fifo_buf));
+            goto handle_error;
+        }
+
+        if (buf && buf_size) {
+            av_fifo_generic_write(fpc->fifo_buf, (void*) read_start,
+                                  read_end - read_start, NULL);
+        } else {
+            int8_t pad[MAX_FRAME_HEADER_SIZE] = { 0 };
+            av_fifo_generic_write(fpc->fifo_buf, (void*) pad, sizeof(pad), NULL);
+        }
+
+        /* Tag headers and update sequences. */
+        start_offset = av_fifo_size(fpc->fifo_buf) -
+                       ((read_end - read_start) + (MAX_FRAME_HEADER_SIZE - 1));
+        start_offset = FFMAX(0, start_offset);
+        nb_headers   = find_new_headers(fpc, start_offset);
+
+        if (nb_headers < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "find_new_headers couldn't allocate FLAC header\n");
+            goto handle_error;
+        }
+
+        fpc->nb_headers_buffered = nb_headers;
+        /* Wait till FLAC_MIN_HEADERS to output a valid frame. */
+        if (!fpc->end_padded && fpc->nb_headers_buffered < FLAC_MIN_HEADERS) {
+            if (buf && read_end < buf + buf_size) {
+                read_start = read_end;
+                continue;
+            } else {
+                goto handle_error;
+            }
+        }
+
+        /* If headers found, update the scores since we have longer chains. */
+        if (fpc->end_padded || fpc->nb_headers_found)
+            score_sequences(fpc);
+
+        /* restore the state pre-padding */
+        if (fpc->end_padded) {
+            int warp = fpc->fifo_buf->wptr - fpc->fifo_buf->buffer < MAX_FRAME_HEADER_SIZE;
+            /* HACK: drain the tail of the fifo */
+            fpc->fifo_buf->wptr -= MAX_FRAME_HEADER_SIZE;
+            fpc->fifo_buf->wndx -= MAX_FRAME_HEADER_SIZE;
+            if (warp) {
+                fpc->fifo_buf->wptr += fpc->fifo_buf->end -
+                    fpc->fifo_buf->buffer;
+            }
+            buf_size = 0;
+            read_start = read_end = NULL;
+        }
+    }
+
+    for (curr = fpc->headers; curr; curr = curr->next) {
+        if (curr->max_score > 0 &&
+            (!fpc->best_header || curr->max_score > fpc->best_header->max_score)) {
+            fpc->best_header = curr;
+        }
+    }
+
+    if (fpc->best_header) {
+        fpc->best_header_valid = 1;
+        if (fpc->best_header->offset > 0) {
+            /* Output a junk frame. */
+            av_log(avctx, AV_LOG_DEBUG, "Junk frame till offset %i\n",
+                   fpc->best_header->offset);
+
+            /* Set duration to 0. It is unknown or invalid in a junk frame. */
+            s->duration = 0;
+            *poutbuf_size     = fpc->best_header->offset;
+            *poutbuf          = flac_fifo_read_wrap(fpc, 0, *poutbuf_size,
+                                                    &fpc->wrap_buf,
+                                                    &fpc->wrap_buf_allocated_size);
+            return buf_size ? (read_end - buf) : (fpc->best_header->offset -
+                                           av_fifo_size(fpc->fifo_buf));
+        }
+        if (!buf_size)
+            return get_best_header(fpc, poutbuf, poutbuf_size);
+    }
+
+handle_error:
+    *poutbuf      = NULL;
+    *poutbuf_size = 0;
+    return buf_size ? read_end - buf : 0;
+}
+
+static av_cold int flac_parse_init(AVCodecParserContext *c)
+{
+    FLACParseContext *fpc = c->priv_data;
+    fpc->pc = c;
+    /* There will generally be FLAC_MIN_HEADERS buffered in the fifo before
+       it drains.  This is allocated early to avoid slow reallocation. */
+    fpc->fifo_buf = av_fifo_alloc_array(FLAC_MIN_HEADERS + 3, FLAC_AVG_FRAME_SIZE);
+    if (!fpc->fifo_buf) {
+        av_log(fpc->avctx, AV_LOG_ERROR,
+                "couldn't allocate fifo_buf\n");
+        return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+static void flac_parse_close(AVCodecParserContext *c)
+{
+    FLACParseContext *fpc = c->priv_data;
+    FLACHeaderMarker *curr = fpc->headers, *temp;
+
+    while (curr) {
+        temp = curr->next;
+        av_freep(&curr->link_penalty);
+        av_free(curr);
+        curr = temp;
+    }
+    av_fifo_freep(&fpc->fifo_buf);
+    av_freep(&fpc->wrap_buf);
+}
+
+AVCodecParser ff_flac_parser = {
+    .codec_ids      = { AV_CODEC_ID_FLAC },
+    .priv_data_size = sizeof(FLACParseContext),
+    .parser_init    = flac_parse_init,
+    .parser_parse   = flac_parse,
+    .parser_close   = flac_parse_close,
+};
diff --git a/media/ffvpx/libavcodec/flacdata.c b/media/ffvpx/libavcodec/flacdata.c
new file mode 100644
index 000000000..1954f32d3
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdata.c
@@ -0,0 +1,33 @@
+/*
+ * FLAC data
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "internal.h"
+
+const int ff_flac_sample_rate_table[16] =
+{ 0,
+  88200, 176400, 192000,
+  8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
+  0, 0, 0, 0 };
+
+const int32_t ff_flac_blocksize_table[16] = {
+     0,    192, 576<<0, 576<<1, 576<<2, 576<<3,      0,      0,
+256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7
+};
diff --git a/media/ffvpx/libavcodec/flacdata.h b/media/ffvpx/libavcodec/flacdata.h
new file mode 100644
index 000000000..e2c1e5d7f
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdata.h
@@ -0,0 +1,31 @@
+/*
+ * FLAC data header
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FLACDATA_H
+#define AVCODEC_FLACDATA_H
+
+#include "internal.h"
+
+extern const int ff_flac_sample_rate_table[16];
+
+extern const int32_t ff_flac_blocksize_table[16];
+
+#endif /* AVCODEC_FLACDATA_H */
diff --git a/media/ffvpx/libavcodec/flacdec.c b/media/ffvpx/libavcodec/flacdec.c
new file mode 100644
index 000000000..b7237e18f
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdec.c
@@ -0,0 +1,677 @@
+/*
+ * FLAC (Free Lossless Audio Codec) decoder
+ * Copyright (c) 2003 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FLAC (Free Lossless Audio Codec) decoder
+ * @author Alex Beregszaszi
+ * @see http://flac.sourceforge.net/
+ *
+ * This decoder can be used in 1 of 2 ways: Either raw FLAC data can be fed
+ * through, starting from the initial 'fLaC' signature; or by passing the
+ * 34-byte streaminfo structure through avctx->extradata[_size] followed
+ * by data starting with the 0xFFF8 marker.
+ */
+
+#include <limits.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/crc.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "bytestream.h"
+#include "golomb.h"
+#include "flac.h"
+#include "flacdata.h"
+#include "flacdsp.h"
+#include "thread.h"
+#include "unary.h"
+
+
+typedef struct FLACContext {
+    AVClass *class;
+    struct FLACStreaminfo flac_stream_info;
+
+    AVCodecContext *avctx;                  ///< parent AVCodecContext
+    GetBitContext gb;                       ///< GetBitContext initialized to start at the current frame
+
+    int blocksize;                          ///< number of samples in the current frame
+    int sample_shift;                       ///< shift required to make output samples 16-bit or 32-bit
+    int ch_mode;                            ///< channel decorrelation type in the current frame
+    int got_streaminfo;                     ///< indicates if the STREAMINFO has been read
+
+    int32_t *decoded[FLAC_MAX_CHANNELS];    ///< decoded samples
+    uint8_t *decoded_buffer;
+    unsigned int decoded_buffer_size;
+    int buggy_lpc;                          ///< use workaround for old lavc encoded files
+
+    FLACDSPContext dsp;
+} FLACContext;
+
+static int allocate_buffers(FLACContext *s);
+
+static void flac_set_bps(FLACContext *s)
+{
+    enum AVSampleFormat req = s->avctx->request_sample_fmt;
+    int need32 = s->flac_stream_info.bps > 16;
+    int want32 = av_get_bytes_per_sample(req) > 2;
+    int planar = av_sample_fmt_is_planar(req);
+
+    if (need32 || want32) {
+        if (planar)
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+        else
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S32;
+        s->sample_shift = 32 - s->flac_stream_info.bps;
+    } else {
+        if (planar)
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
+        else
+            s->avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->sample_shift = 16 - s->flac_stream_info.bps;
+    }
+}
+
+static av_cold int flac_decode_init(AVCodecContext *avctx)
+{
+    enum FLACExtradataFormat format;
+    uint8_t *streaminfo;
+    int ret;
+    FLACContext *s = avctx->priv_data;
+    s->avctx = avctx;
+
+    /* for now, the raw FLAC header is allowed to be passed to the decoder as
+       frame data instead of extradata. */
+    if (!avctx->extradata)
+        return 0;
+
+    if (!ff_flac_is_extradata_valid(avctx, &format, &streaminfo))
+        return AVERROR_INVALIDDATA;
+
+    /* initialize based on the demuxer-supplied streamdata header */
+    ff_flac_parse_streaminfo(avctx, &s->flac_stream_info, streaminfo);
+    ret = allocate_buffers(s);
+    if (ret < 0)
+        return ret;
+    flac_set_bps(s);
+    ff_flacdsp_init(&s->dsp, avctx->sample_fmt,
+                    s->flac_stream_info.channels, s->flac_stream_info.bps);
+    s->got_streaminfo = 1;
+
+    return 0;
+}
+
+static void dump_headers(AVCodecContext *avctx, FLACStreaminfo *s)
+{
+    av_log(avctx, AV_LOG_DEBUG, "  Max Blocksize: %d\n", s->max_blocksize);
+    av_log(avctx, AV_LOG_DEBUG, "  Max Framesize: %d\n", s->max_framesize);
+    av_log(avctx, AV_LOG_DEBUG, "  Samplerate: %d\n", s->samplerate);
+    av_log(avctx, AV_LOG_DEBUG, "  Channels: %d\n", s->channels);
+    av_log(avctx, AV_LOG_DEBUG, "  Bits: %d\n", s->bps);
+}
+
+static int allocate_buffers(FLACContext *s)
+{
+    int buf_size;
+    int ret;
+
+    av_assert0(s->flac_stream_info.max_blocksize);
+
+    buf_size = av_samples_get_buffer_size(NULL, s->flac_stream_info.channels,
+                                          s->flac_stream_info.max_blocksize,
+                                          AV_SAMPLE_FMT_S32P, 0);
+    if (buf_size < 0)
+        return buf_size;
+
+    av_fast_malloc(&s->decoded_buffer, &s->decoded_buffer_size, buf_size);
+    if (!s->decoded_buffer)
+        return AVERROR(ENOMEM);
+
+    ret = av_samples_fill_arrays((uint8_t **)s->decoded, NULL,
+                                 s->decoded_buffer,
+                                 s->flac_stream_info.channels,
+                                 s->flac_stream_info.max_blocksize,
+                                 AV_SAMPLE_FMT_S32P, 0);
+    return ret < 0 ? ret : 0;
+}
+
+/**
+ * Parse the STREAMINFO from an inline header.
+ * @param s the flac decoding context
+ * @param buf input buffer, starting with the "fLaC" marker
+ * @param buf_size buffer size
+ * @return non-zero if metadata is invalid
+ */
+static int parse_streaminfo(FLACContext *s, const uint8_t *buf, int buf_size)
+{
+    int metadata_type, metadata_size, ret;
+
+    if (buf_size < FLAC_STREAMINFO_SIZE+8) {
+        /* need more data */
+        return 0;
+    }
+    flac_parse_block_header(&buf[4], NULL, &metadata_type, &metadata_size);
+    if (metadata_type != FLAC_METADATA_TYPE_STREAMINFO ||
+        metadata_size != FLAC_STREAMINFO_SIZE) {
+        return AVERROR_INVALIDDATA;
+    }
+    ff_flac_parse_streaminfo(s->avctx, &s->flac_stream_info, &buf[8]);
+    ret = allocate_buffers(s);
+    if (ret < 0)
+        return ret;
+    flac_set_bps(s);
+    ff_flacdsp_init(&s->dsp, s->avctx->sample_fmt,
+                    s->flac_stream_info.channels, s->flac_stream_info.bps);
+    s->got_streaminfo = 1;
+
+    return 0;
+}
+
+/**
+ * Determine the size of an inline header.
+ * @param buf input buffer, starting with the "fLaC" marker
+ * @param buf_size buffer size
+ * @return number of bytes in the header, or 0 if more data is needed
+ */
+static int get_metadata_size(const uint8_t *buf, int buf_size)
+{
+    int metadata_last, metadata_size;
+    const uint8_t *buf_end = buf + buf_size;
+
+    buf += 4;
+    do {
+        if (buf_end - buf < 4)
+            return 0;
+        flac_parse_block_header(buf, &metadata_last, NULL, &metadata_size);
+        buf += 4;
+        if (buf_end - buf < metadata_size) {
+            /* need more data in order to read the complete header */
+            return 0;
+        }
+        buf += metadata_size;
+    } while (!metadata_last);
+
+    return buf_size - (buf_end - buf);
+}
+
+static int decode_residuals(FLACContext *s, int32_t *decoded, int pred_order)
+{
+    int i, tmp, partition, method_type, rice_order;
+    int rice_bits, rice_esc;
+    int samples;
+
+    method_type = get_bits(&s->gb, 2);
+    if (method_type > 1) {
+        av_log(s->avctx, AV_LOG_ERROR, "illegal residual coding method %d\n",
+               method_type);
+        return AVERROR_INVALIDDATA;
+    }
+
+    rice_order = get_bits(&s->gb, 4);
+
+    samples= s->blocksize >> rice_order;
+    if (samples << rice_order != s->blocksize) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid rice order: %i blocksize %i\n",
+               rice_order, s->blocksize);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (pred_order > samples) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid predictor order: %i > %i\n",
+               pred_order, samples);
+        return AVERROR_INVALIDDATA;
+    }
+
+    rice_bits = 4 + method_type;
+    rice_esc  = (1 << rice_bits) - 1;
+
+    decoded += pred_order;
+    i= pred_order;
+    for (partition = 0; partition < (1 << rice_order); partition++) {
+        tmp = get_bits(&s->gb, rice_bits);
+        if (tmp == rice_esc) {
+            tmp = get_bits(&s->gb, 5);
+            for (; i < samples; i++)
+                *decoded++ = get_sbits_long(&s->gb, tmp);
+        } else {
+            for (; i < samples; i++) {
+                *decoded++ = get_sr_golomb_flac(&s->gb, tmp, INT_MAX, 0);
+            }
+        }
+        i= 0;
+    }
+
+    return 0;
+}
+
+static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
+                                 int pred_order, int bps)
+{
+    const int blocksize = s->blocksize;
+    int av_uninit(a), av_uninit(b), av_uninit(c), av_uninit(d), i;
+    int ret;
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits_long(&s->gb, bps);
+    }
+
+    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
+        return ret;
+
+    if (pred_order > 0)
+        a = decoded[pred_order-1];
+    if (pred_order > 1)
+        b = a - decoded[pred_order-2];
+    if (pred_order > 2)
+        c = b - decoded[pred_order-2] + decoded[pred_order-3];
+    if (pred_order > 3)
+        d = c - decoded[pred_order-2] + 2*decoded[pred_order-3] - decoded[pred_order-4];
+
+    switch (pred_order) {
+    case 0:
+        break;
+    case 1:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += decoded[i];
+        break;
+    case 2:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += b += decoded[i];
+        break;
+    case 3:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += b += c += decoded[i];
+        break;
+    case 4:
+        for (i = pred_order; i < blocksize; i++)
+            decoded[i] = a += b += c += d += decoded[i];
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static void lpc_analyze_remodulate(int32_t *decoded, const int coeffs[32],
+                                   int order, int qlevel, int len, int bps)
+{
+    int i, j;
+    int ebps = 1 << (bps-1);
+    unsigned sigma = 0;
+
+    for (i = order; i < len; i++)
+        sigma |= decoded[i] + ebps;
+
+    if (sigma < 2*ebps)
+        return;
+
+    for (i = len - 1; i >= order; i--) {
+        int64_t p = 0;
+        for (j = 0; j < order; j++)
+            p += coeffs[j] * (int64_t)decoded[i-order+j];
+        decoded[i] -= p >> qlevel;
+    }
+    for (i = order; i < len; i++, decoded++) {
+        int32_t p = 0;
+        for (j = 0; j < order; j++)
+            p += coeffs[j] * (uint32_t)decoded[j];
+        decoded[j] += p >> qlevel;
+    }
+}
+
+static int decode_subframe_lpc(FLACContext *s, int32_t *decoded, int pred_order,
+                               int bps)
+{
+    int i, ret;
+    int coeff_prec, qlevel;
+    int coeffs[32];
+
+    /* warm up samples */
+    for (i = 0; i < pred_order; i++) {
+        decoded[i] = get_sbits_long(&s->gb, bps);
+    }
+
+    coeff_prec = get_bits(&s->gb, 4) + 1;
+    if (coeff_prec == 16) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coeff precision\n");
+        return AVERROR_INVALIDDATA;
+    }
+    qlevel = get_sbits(&s->gb, 5);
+    if (qlevel < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "qlevel %d not supported, maybe buggy stream\n",
+               qlevel);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < pred_order; i++) {
+        coeffs[pred_order - i - 1] = get_sbits(&s->gb, coeff_prec);
+    }
+
+    if ((ret = decode_residuals(s, decoded, pred_order)) < 0)
+        return ret;
+
+    if (   (    s->buggy_lpc && s->flac_stream_info.bps <= 16)
+        || (   !s->buggy_lpc && bps <= 16
+            && bps + coeff_prec + av_log2(pred_order) <= 32)) {
+        s->dsp.lpc16(decoded, coeffs, pred_order, qlevel, s->blocksize);
+    } else {
+        s->dsp.lpc32(decoded, coeffs, pred_order, qlevel, s->blocksize);
+        if (s->flac_stream_info.bps <= 16)
+            lpc_analyze_remodulate(decoded, coeffs, pred_order, qlevel, s->blocksize, bps);
+    }
+
+    return 0;
+}
+
+static inline int decode_subframe(FLACContext *s, int channel)
+{
+    int32_t *decoded = s->decoded[channel];
+    int type, wasted = 0;
+    int bps = s->flac_stream_info.bps;
+    int i, tmp, ret;
+
+    if (channel == 0) {
+        if (s->ch_mode == FLAC_CHMODE_RIGHT_SIDE)
+            bps++;
+    } else {
+        if (s->ch_mode == FLAC_CHMODE_LEFT_SIDE || s->ch_mode == FLAC_CHMODE_MID_SIDE)
+            bps++;
+    }
+
+    if (get_bits1(&s->gb)) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid subframe padding\n");
+        return AVERROR_INVALIDDATA;
+    }
+    type = get_bits(&s->gb, 6);
+
+    if (get_bits1(&s->gb)) {
+        int left = get_bits_left(&s->gb);
+        if ( left <= 0 ||
+            (left < bps && !show_bits_long(&s->gb, left)) ||
+                           !show_bits_long(&s->gb, bps)) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid number of wasted bits > available bits (%d) - left=%d\n",
+                   bps, left);
+            return AVERROR_INVALIDDATA;
+        }
+        wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
+        bps -= wasted;
+    }
+    if (bps > 32) {
+        avpriv_report_missing_feature(s->avctx, "Decorrelated bit depth > 32");
+        return AVERROR_PATCHWELCOME;
+    }
+
+//FIXME use av_log2 for types
+    if (type == 0) {
+        tmp = get_sbits_long(&s->gb, bps);
+        for (i = 0; i < s->blocksize; i++)
+            decoded[i] = tmp;
+    } else if (type == 1) {
+        for (i = 0; i < s->blocksize; i++)
+            decoded[i] = get_sbits_long(&s->gb, bps);
+    } else if ((type >= 8) && (type <= 12)) {
+        if ((ret = decode_subframe_fixed(s, decoded, type & ~0x8, bps)) < 0)
+            return ret;
+    } else if (type >= 32) {
+        if ((ret = decode_subframe_lpc(s, decoded, (type & ~0x20)+1, bps)) < 0)
+            return ret;
+    } else {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (wasted) {
+        int i;
+        for (i = 0; i < s->blocksize; i++)
+            decoded[i] <<= wasted;
+    }
+
+    return 0;
+}
+
+static int decode_frame(FLACContext *s)
+{
+    int i, ret;
+    GetBitContext *gb = &s->gb;
+    FLACFrameInfo fi;
+
+    if ((ret = ff_flac_decode_frame_header(s->avctx, gb, &fi, 0)) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid frame header\n");
+        return ret;
+    }
+
+    if (   s->flac_stream_info.channels
+        && fi.channels != s->flac_stream_info.channels
+        && s->got_streaminfo) {
+        s->flac_stream_info.channels = s->avctx->channels = fi.channels;
+        ff_flac_set_channel_layout(s->avctx);
+        ret = allocate_buffers(s);
+        if (ret < 0)
+            return ret;
+    }
+    s->flac_stream_info.channels = s->avctx->channels = fi.channels;
+    if (!s->avctx->channel_layout)
+        ff_flac_set_channel_layout(s->avctx);
+    s->ch_mode = fi.ch_mode;
+
+    if (!s->flac_stream_info.bps && !fi.bps) {
+        av_log(s->avctx, AV_LOG_ERROR, "bps not found in STREAMINFO or frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (!fi.bps) {
+        fi.bps = s->flac_stream_info.bps;
+    } else if (s->flac_stream_info.bps && fi.bps != s->flac_stream_info.bps) {
+        av_log(s->avctx, AV_LOG_ERROR, "switching bps mid-stream is not "
+                                       "supported\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!s->flac_stream_info.bps) {
+        s->flac_stream_info.bps = s->avctx->bits_per_raw_sample = fi.bps;
+        flac_set_bps(s);
+    }
+
+    if (!s->flac_stream_info.max_blocksize)
+        s->flac_stream_info.max_blocksize = FLAC_MAX_BLOCKSIZE;
+    if (fi.blocksize > s->flac_stream_info.max_blocksize) {
+        av_log(s->avctx, AV_LOG_ERROR, "blocksize %d > %d\n", fi.blocksize,
+               s->flac_stream_info.max_blocksize);
+        return AVERROR_INVALIDDATA;
+    }
+    s->blocksize = fi.blocksize;
+
+    if (!s->flac_stream_info.samplerate && !fi.samplerate) {
+        av_log(s->avctx, AV_LOG_ERROR, "sample rate not found in STREAMINFO"
+                                        " or frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (fi.samplerate == 0)
+        fi.samplerate = s->flac_stream_info.samplerate;
+    s->flac_stream_info.samplerate = s->avctx->sample_rate = fi.samplerate;
+
+    if (!s->got_streaminfo) {
+        ret = allocate_buffers(s);
+        if (ret < 0)
+            return ret;
+        s->got_streaminfo = 1;
+        dump_headers(s->avctx, &s->flac_stream_info);
+    }
+    ff_flacdsp_init(&s->dsp, s->avctx->sample_fmt,
+                    s->flac_stream_info.channels, s->flac_stream_info.bps);
+
+//    dump_headers(s->avctx, &s->flac_stream_info);
+
+    /* subframes */
+    for (i = 0; i < s->flac_stream_info.channels; i++) {
+        if ((ret = decode_subframe(s, i)) < 0)
+            return ret;
+    }
+
+    align_get_bits(gb);
+
+    /* frame footer */
+    skip_bits(gb, 16); /* data crc */
+
+    return 0;
+}
+
+static int flac_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame_ptr, AVPacket *avpkt)
+{
+    AVFrame *frame     = data;
+    ThreadFrame tframe = { .f = data };
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    FLACContext *s = avctx->priv_data;
+    int bytes_read = 0;
+    int ret;
+
+    *got_frame_ptr = 0;
+
+    if (s->flac_stream_info.max_framesize == 0) {
+        s->flac_stream_info.max_framesize =
+            ff_flac_get_max_frame_size(s->flac_stream_info.max_blocksize ? s->flac_stream_info.max_blocksize : FLAC_MAX_BLOCKSIZE,
+                                       FLAC_MAX_CHANNELS, 32);
+    }
+
+    if (buf_size > 5 && !memcmp(buf, "\177FLAC", 5)) {
+        av_log(s->avctx, AV_LOG_DEBUG, "skipping flac header packet 1\n");
+        return buf_size;
+    }
+
+    if (buf_size > 0 && (*buf & 0x7F) == FLAC_METADATA_TYPE_VORBIS_COMMENT) {
+        av_log(s->avctx, AV_LOG_DEBUG, "skipping vorbis comment\n");
+        return buf_size;
+    }
+
+    /* check that there is at least the smallest decodable amount of data.
+       this amount corresponds to the smallest valid FLAC frame possible.
+       FF F8 69 02 00 00 9A 00 00 34 46 */
+    if (buf_size < FLAC_MIN_FRAME_SIZE)
+        return buf_size;
+
+    /* check for inline header */
+    if (AV_RB32(buf) == MKBETAG('f','L','a','C')) {
+        if (!s->got_streaminfo && (ret = parse_streaminfo(s, buf, buf_size))) {
+            av_log(s->avctx, AV_LOG_ERROR, "invalid header\n");
+            return ret;
+        }
+        return get_metadata_size(buf, buf_size);
+    }
+
+    /* decode frame */
+    if ((ret = init_get_bits8(&s->gb, buf, buf_size)) < 0)
+        return ret;
+    if ((ret = decode_frame(s)) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "decode_frame() failed\n");
+        return ret;
+    }
+    bytes_read = get_bits_count(&s->gb)/8;
+
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_COMPLIANT)) &&
+        av_crc(av_crc_get_table(AV_CRC_16_ANSI),
+               0, buf, bytes_read)) {
+        av_log(s->avctx, AV_LOG_ERROR, "CRC error at PTS %"PRId64"\n", avpkt->pts);
+        if (s->avctx->err_recognition & AV_EF_EXPLODE)
+            return AVERROR_INVALIDDATA;
+    }
+
+    /* get output buffer */
+    frame->nb_samples = s->blocksize;
+    if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
+        return ret;
+
+    s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded,
+                                   s->flac_stream_info.channels,
+                                   s->blocksize, s->sample_shift);
+
+    if (bytes_read > buf_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", bytes_read - buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+    if (bytes_read < buf_size) {
+        av_log(s->avctx, AV_LOG_DEBUG, "underread: %d orig size: %d\n",
+               buf_size - bytes_read, buf_size);
+    }
+
+    *got_frame_ptr = 1;
+
+    return bytes_read;
+}
+
+#if HAVE_THREADS
+static int init_thread_copy(AVCodecContext *avctx)
+{
+    FLACContext *s = avctx->priv_data;
+    s->decoded_buffer = NULL;
+    s->decoded_buffer_size = 0;
+    s->avctx = avctx;
+    if (s->flac_stream_info.max_blocksize)
+        return allocate_buffers(s);
+    return 0;
+}
+#endif
+
+static av_cold int flac_decode_close(AVCodecContext *avctx)
+{
+    FLACContext *s = avctx->priv_data;
+
+    av_freep(&s->decoded_buffer);
+
+    return 0;
+}
+
+static const AVOption options[] = {
+{ "use_buggy_lpc", "emulate old buggy lavc behavior", offsetof(FLACContext, buggy_lpc), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
+{ NULL },
+};
+
+static const AVClass flac_decoder_class = {
+    "FLAC decoder",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_flac_decoder = {
+    .name           = "flac",
+    .long_name      = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_FLAC,
+    .priv_data_size = sizeof(FLACContext),
+    .init           = flac_decode_init,
+    .close          = flac_decode_close,
+    .decode         = flac_decode_frame,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
+                                                      AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_S32,
+                                                      AV_SAMPLE_FMT_S32P,
+                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &flac_decoder_class,
+};
diff --git a/media/ffvpx/libavcodec/flacdsp.c b/media/ffvpx/libavcodec/flacdsp.c
new file mode 100644
index 000000000..30b66484e
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/samplefmt.h"
+#include "flacdsp.h"
+#include "config.h"
+
+#define SAMPLE_SIZE 16
+#define PLANAR 0
+#include "flacdsp_template.c"
+#include "flacdsp_lpc_template.c"
+
+#undef  PLANAR
+#define PLANAR 1
+#include "flacdsp_template.c"
+
+#undef  SAMPLE_SIZE
+#undef  PLANAR
+#define SAMPLE_SIZE 32
+#define PLANAR 0
+#include "flacdsp_template.c"
+#include "flacdsp_lpc_template.c"
+
+#undef  PLANAR
+#define PLANAR 1
+#include "flacdsp_template.c"
+
+static void flac_lpc_16_c(int32_t *decoded, const int coeffs[32],
+                          int pred_order, int qlevel, int len)
+{
+    int i, j;
+
+    for (i = pred_order; i < len - 1; i += 2, decoded += 2) {
+        int c = coeffs[0];
+        int d = decoded[0];
+        int s0 = 0, s1 = 0;
+        for (j = 1; j < pred_order; j++) {
+            s0 += c*d;
+            d = decoded[j];
+            s1 += c*d;
+            c = coeffs[j];
+        }
+        s0 += c*d;
+        d = decoded[j] += s0 >> qlevel;
+        s1 += c*d;
+        decoded[j + 1] += s1 >> qlevel;
+    }
+    if (i < len) {
+        int sum = 0;
+        for (j = 0; j < pred_order; j++)
+            sum += coeffs[j] * decoded[j];
+        decoded[j] += sum >> qlevel;
+    }
+}
+
+static void flac_lpc_32_c(int32_t *decoded, const int coeffs[32],
+                          int pred_order, int qlevel, int len)
+{
+    int i, j;
+
+    for (i = pred_order; i < len; i++, decoded++) {
+        int64_t sum = 0;
+        for (j = 0; j < pred_order; j++)
+            sum += (int64_t)coeffs[j] * decoded[j];
+        decoded[j] += sum >> qlevel;
+    }
+
+}
+
+av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels,
+                             int bps)
+{
+    c->lpc16        = flac_lpc_16_c;
+    c->lpc32        = flac_lpc_32_c;
+    c->lpc16_encode = flac_lpc_encode_c_16;
+    c->lpc32_encode = flac_lpc_encode_c_32;
+
+    switch (fmt) {
+    case AV_SAMPLE_FMT_S32:
+        c->decorrelate[0] = flac_decorrelate_indep_c_32;
+        c->decorrelate[1] = flac_decorrelate_ls_c_32;
+        c->decorrelate[2] = flac_decorrelate_rs_c_32;
+        c->decorrelate[3] = flac_decorrelate_ms_c_32;
+        break;
+
+    case AV_SAMPLE_FMT_S32P:
+        c->decorrelate[0] = flac_decorrelate_indep_c_32p;
+        c->decorrelate[1] = flac_decorrelate_ls_c_32p;
+        c->decorrelate[2] = flac_decorrelate_rs_c_32p;
+        c->decorrelate[3] = flac_decorrelate_ms_c_32p;
+        break;
+
+    case AV_SAMPLE_FMT_S16:
+        c->decorrelate[0] = flac_decorrelate_indep_c_16;
+        c->decorrelate[1] = flac_decorrelate_ls_c_16;
+        c->decorrelate[2] = flac_decorrelate_rs_c_16;
+        c->decorrelate[3] = flac_decorrelate_ms_c_16;
+        break;
+
+    case AV_SAMPLE_FMT_S16P:
+        c->decorrelate[0] = flac_decorrelate_indep_c_16p;
+        c->decorrelate[1] = flac_decorrelate_ls_c_16p;
+        c->decorrelate[2] = flac_decorrelate_rs_c_16p;
+        c->decorrelate[3] = flac_decorrelate_ms_c_16p;
+        break;
+    }
+
+    if (ARCH_ARM)
+        ff_flacdsp_init_arm(c, fmt, channels, bps);
+    if (ARCH_X86)
+        ff_flacdsp_init_x86(c, fmt, channels, bps);
+}
diff --git a/media/ffvpx/libavcodec/flacdsp.h b/media/ffvpx/libavcodec/flacdsp.h
new file mode 100644
index 000000000..f5cbd9472
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp.h
@@ -0,0 +1,42 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FLACDSP_H
+#define AVCODEC_FLACDSP_H
+
+#include <stdint.h>
+#include "libavutil/samplefmt.h"
+
+typedef struct FLACDSPContext {
+    void (*decorrelate[4])(uint8_t **out, int32_t **in, int channels,
+                           int len, int shift);
+    void (*lpc16)(int32_t *samples, const int coeffs[32], int order,
+                  int qlevel, int len);
+    void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
+                  int qlevel, int len);
+    void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order,
+                         const int32_t coefs[32], int shift);
+    void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order,
+                         const int32_t coefs[32], int shift);
+} FLACDSPContext;
+
+void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
+void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
+void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels, int bps);
+
+#endif /* AVCODEC_FLACDSP_H */
diff --git a/media/ffvpx/libavcodec/flacdsp_lpc_template.c b/media/ffvpx/libavcodec/flacdsp_lpc_template.c
new file mode 100644
index 000000000..5d532e067
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp_lpc_template.c
@@ -0,0 +1,159 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "libavutil/avutil.h"
+#include "mathops.h"
+
+#undef FUNC
+#undef sum_type
+#undef MUL
+#undef CLIP
+#undef FSUF
+
+#define FUNC(n) AV_JOIN(n ## _, SAMPLE_SIZE)
+
+#if SAMPLE_SIZE == 32
+#   define sum_type  int64_t
+#   define MUL(a, b) MUL64(a, b)
+#   define CLIP(x) av_clipl_int32(x)
+#else
+#   define sum_type  int32_t
+#   define MUL(a, b) ((a) * (b))
+#   define CLIP(x) (x)
+#endif
+
+#define LPC1(x) {           \
+    int c = coefs[(x)-1];   \
+    p0   += MUL(c, s);      \
+    s     = smp[i-(x)+1];   \
+    p1   += MUL(c, s);      \
+}
+
+static av_always_inline void FUNC(lpc_encode_unrolled)(int32_t *res,
+                                  const int32_t *smp, int len, int order,
+                                  const int32_t *coefs, int shift, int big)
+{
+    int i;
+    for (i = order; i < len; i += 2) {
+        int s  = smp[i-order];
+        sum_type p0 = 0, p1 = 0;
+        if (big) {
+            switch (order) {
+            case 32: LPC1(32)
+            case 31: LPC1(31)
+            case 30: LPC1(30)
+            case 29: LPC1(29)
+            case 28: LPC1(28)
+            case 27: LPC1(27)
+            case 26: LPC1(26)
+            case 25: LPC1(25)
+            case 24: LPC1(24)
+            case 23: LPC1(23)
+            case 22: LPC1(22)
+            case 21: LPC1(21)
+            case 20: LPC1(20)
+            case 19: LPC1(19)
+            case 18: LPC1(18)
+            case 17: LPC1(17)
+            case 16: LPC1(16)
+            case 15: LPC1(15)
+            case 14: LPC1(14)
+            case 13: LPC1(13)
+            case 12: LPC1(12)
+            case 11: LPC1(11)
+            case 10: LPC1(10)
+            case  9: LPC1( 9)
+                     LPC1( 8)
+                     LPC1( 7)
+                     LPC1( 6)
+                     LPC1( 5)
+                     LPC1( 4)
+                     LPC1( 3)
+                     LPC1( 2)
+                     LPC1( 1)
+            }
+        } else {
+            switch (order) {
+            case  8: LPC1( 8)
+            case  7: LPC1( 7)
+            case  6: LPC1( 6)
+            case  5: LPC1( 5)
+            case  4: LPC1( 4)
+            case  3: LPC1( 3)
+            case  2: LPC1( 2)
+            case  1: LPC1( 1)
+            }
+        }
+        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
+        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
+    }
+}
+
+static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
+                                    int order, const int32_t *coefs, int shift)
+{
+    int i;
+    for (i = 0; i < order; i++)
+        res[i] = smp[i];
+#if CONFIG_SMALL
+    for (i = order; i < len; i += 2) {
+        int j;
+        int s  = smp[i];
+        sum_type p0 = 0, p1 = 0;
+        for (j = 0; j < order; j++) {
+            int c = coefs[j];
+            p1   += MUL(c, s);
+            s     = smp[i-j-1];
+            p0   += MUL(c, s);
+        }
+        res[i  ] = smp[i  ] - CLIP(p0 >> shift);
+        res[i+1] = smp[i+1] - CLIP(p1 >> shift);
+    }
+#else
+    switch (order) {
+    case  1: FUNC(lpc_encode_unrolled)(res, smp, len,     1, coefs, shift, 0); break;
+    case  2: FUNC(lpc_encode_unrolled)(res, smp, len,     2, coefs, shift, 0); break;
+    case  3: FUNC(lpc_encode_unrolled)(res, smp, len,     3, coefs, shift, 0); break;
+    case  4: FUNC(lpc_encode_unrolled)(res, smp, len,     4, coefs, shift, 0); break;
+    case  5: FUNC(lpc_encode_unrolled)(res, smp, len,     5, coefs, shift, 0); break;
+    case  6: FUNC(lpc_encode_unrolled)(res, smp, len,     6, coefs, shift, 0); break;
+    case  7: FUNC(lpc_encode_unrolled)(res, smp, len,     7, coefs, shift, 0); break;
+    case  8: FUNC(lpc_encode_unrolled)(res, smp, len,     8, coefs, shift, 0); break;
+    default: FUNC(lpc_encode_unrolled)(res, smp, len, order, coefs, shift, 1); break;
+    }
+#endif
+}
+
+/* Comment for clarity/de-obfuscation.
+ *
+ * for (int i = order; i < len; i++) {
+ *     int32_t p = 0;
+ *     for (int j = 0; j < order; j++) {
+ *         int c = coefs[j];
+ *         int s = smp[(i-1)-j];
+ *         p    += c*s;
+ *     }
+ *     res[i] = smp[i] - (p >> shift);
+ * }
+ *
+ * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
+ * not being equal to 32 (at the present time that means for 16-bit audio). The
+ * code above does 2 samples per iteration.  Commit bfdd5bc (made all the way
+ * back in 2007) says that way is faster.
+ */
diff --git a/media/ffvpx/libavcodec/flacdsp_template.c b/media/ffvpx/libavcodec/flacdsp_template.c
new file mode 100644
index 000000000..62c0a15ff
--- /dev/null
+++ b/media/ffvpx/libavcodec/flacdsp_template.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include "libavutil/avutil.h"
+
+#undef FUNC
+#undef FSUF
+#undef sample
+#undef sample_type
+#undef OUT
+#undef S
+
+#if SAMPLE_SIZE == 32
+#   define sample_type  int32_t
+#else
+#   define sample_type  int16_t
+#endif
+
+#if PLANAR
+#   define FSUF   AV_JOIN(SAMPLE_SIZE, p)
+#   define sample sample_type *
+#   define OUT(n) n
+#   define S(s, c, i) (s[c][i])
+#else
+#   define FSUF   SAMPLE_SIZE
+#   define sample sample_type
+#   define OUT(n) n[0]
+#   define S(s, c, i) (*s++)
+#endif
+
+#define FUNC(n) AV_JOIN(n ## _, FSUF)
+
+static void FUNC(flac_decorrelate_indep_c)(uint8_t **out, int32_t **in,
+                                           int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i, j;
+
+    for (j = 0; j < len; j++)
+        for (i = 0; i < channels; i++)
+            S(samples, i, j) = in[i][j] << shift;
+}
+
+static void FUNC(flac_decorrelate_ls_c)(uint8_t **out, int32_t **in,
+                                        int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i;
+
+    for (i = 0; i < len; i++) {
+        int a = in[0][i];
+        int b = in[1][i];
+        S(samples, 0, i) =  a      << shift;
+        S(samples, 1, i) = (a - b) << shift;
+    }
+}
+
+static void FUNC(flac_decorrelate_rs_c)(uint8_t **out, int32_t **in,
+                                        int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i;
+
+    for (i = 0; i < len; i++) {
+        int a = in[0][i];
+        int b = in[1][i];
+        S(samples, 0, i) = (a + b) << shift;
+        S(samples, 1, i) =  b      << shift;
+    }
+}
+
+static void FUNC(flac_decorrelate_ms_c)(uint8_t **out, int32_t **in,
+                                        int channels, int len, int shift)
+{
+    sample *samples = (sample *) OUT(out);
+    int i;
+
+    for (i = 0; i < len; i++) {
+        int a = in[0][i];
+        int b = in[1][i];
+        a -= b >> 1;
+        S(samples, 0, i) = (a + b) << shift;
+        S(samples, 1, i) =  a      << shift;
+    }
+}
diff --git a/media/ffvpx/libavcodec/frame_thread_encoder.h b/media/ffvpx/libavcodec/frame_thread_encoder.h
new file mode 100644
index 000000000..1f79553f2
--- /dev/null
+++ b/media/ffvpx/libavcodec/frame_thread_encoder.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FRAME_THREAD_ENCODER_H
+#define AVCODEC_FRAME_THREAD_ENCODER_H
+
+#include "avcodec.h"
+
+int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options);
+void ff_frame_thread_encoder_free(AVCodecContext *avctx);
+int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr);
+
+#endif /* AVCODEC_FRAME_THREAD_ENCODER_H */
diff --git a/media/ffvpx/libavcodec/get_bits.h b/media/ffvpx/libavcodec/get_bits.h
new file mode 100644
index 000000000..0f183e035
--- /dev/null
+++ b/media/ffvpx/libavcodec/get_bits.h
@@ -0,0 +1,587 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream reader API header.
+ */
+
+#ifndef AVCODEC_GET_BITS_H
+#define AVCODEC_GET_BITS_H
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "libavutil/avassert.h"
+#include "mathops.h"
+#include "vlc.h"
+
+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+    int index;
+    int size_in_bits;
+    int size_in_bits_plus8;
+} GetBitContext;
+
+/* Bitstream reader API docs:
+ * name
+ *   arbitrary name which is used as prefix for the internal variables
+ *
+ * gb
+ *   getbitcontext
+ *
+ * OPEN_READER(name, gb)
+ *   load gb into local variables
+ *
+ * CLOSE_READER(name, gb)
+ *   store local vars in gb
+ *
+ * UPDATE_CACHE(name, gb)
+ *   Refill the internal cache from the bitstream.
+ *   After this call at least MIN_CACHE_BITS will be available.
+ *
+ * GET_CACHE(name, gb)
+ *   Will output the contents of the internal cache,
+ *   next bit is MSB of 32 or 64 bits (FIXME 64 bits).
+ *
+ * SHOW_UBITS(name, gb, num)
+ *   Will return the next num bits.
+ *
+ * SHOW_SBITS(name, gb, num)
+ *   Will return the next num bits and do sign extension.
+ *
+ * SKIP_BITS(name, gb, num)
+ *   Will skip over the next num bits.
+ *   Note, this is equivalent to SKIP_CACHE; SKIP_COUNTER.
+ *
+ * SKIP_CACHE(name, gb, num)
+ *   Will remove the next num bits from the cache (note SKIP_COUNTER
+ *   MUST be called before UPDATE_CACHE / CLOSE_READER).
+ *
+ * SKIP_COUNTER(name, gb, num)
+ *   Will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS).
+ *
+ * LAST_SKIP_BITS(name, gb, num)
+ *   Like SKIP_BITS, to be used if next call is UPDATE_CACHE or CLOSE_READER.
+ *
+ * BITS_LEFT(name, gb)
+ *   Return the number of bits left
+ *
+ * For examples see get_bits, show_bits, skip_bits, get_vlc.
+ */
+
+#ifdef LONG_BITSTREAM_READER
+#   define MIN_CACHE_BITS 32
+#else
+#   define MIN_CACHE_BITS 25
+#endif
+
+#define OPEN_READER_NOSIZE(name, gb)            \
+    unsigned int name ## _index = (gb)->index;  \
+    unsigned int av_unused name ## _cache
+
+#if UNCHECKED_BITSTREAM_READER
+#define OPEN_READER(name, gb) OPEN_READER_NOSIZE(name, gb)
+
+#define BITS_AVAILABLE(name, gb) 1
+#else
+#define OPEN_READER(name, gb)                   \
+    OPEN_READER_NOSIZE(name, gb);               \
+    unsigned int name ## _size_plus8 = (gb)->size_in_bits_plus8
+
+#define BITS_AVAILABLE(name, gb) name ## _index < name ## _size_plus8
+#endif
+
+#define CLOSE_READER(name, gb) (gb)->index = name ## _index
+
+# ifdef LONG_BITSTREAM_READER
+
+# define UPDATE_CACHE_LE(name, gb) name ## _cache = \
+      AV_RL64((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
+
+# define UPDATE_CACHE_BE(name, gb) name ## _cache = \
+      AV_RB64((gb)->buffer + (name ## _index >> 3)) >> (32 - (name ## _index & 7))
+
+#else
+
+# define UPDATE_CACHE_LE(name, gb) name ## _cache = \
+      AV_RL32((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
+
+# define UPDATE_CACHE_BE(name, gb) name ## _cache = \
+      AV_RB32((gb)->buffer + (name ## _index >> 3)) << (name ## _index & 7)
+
+#endif
+
+
+#ifdef BITSTREAM_READER_LE
+
+# define UPDATE_CACHE(name, gb) UPDATE_CACHE_LE(name, gb)
+
+# define SKIP_CACHE(name, gb, num) name ## _cache >>= (num)
+
+#else
+
+# define UPDATE_CACHE(name, gb) UPDATE_CACHE_BE(name, gb)
+
+# define SKIP_CACHE(name, gb, num) name ## _cache <<= (num)
+
+#endif
+
+#if UNCHECKED_BITSTREAM_READER
+#   define SKIP_COUNTER(name, gb, num) name ## _index += (num)
+#else
+#   define SKIP_COUNTER(name, gb, num) \
+    name ## _index = FFMIN(name ## _size_plus8, name ## _index + (num))
+#endif
+
+#define BITS_LEFT(name, gb) ((int)((gb)->size_in_bits - name ## _index))
+
+#define SKIP_BITS(name, gb, num)                \
+    do {                                        \
+        SKIP_CACHE(name, gb, num);              \
+        SKIP_COUNTER(name, gb, num);            \
+    } while (0)
+
+#define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+
+#define SHOW_UBITS_LE(name, gb, num) zero_extend(name ## _cache, num)
+#define SHOW_SBITS_LE(name, gb, num) sign_extend(name ## _cache, num)
+
+#define SHOW_UBITS_BE(name, gb, num) NEG_USR32(name ## _cache, num)
+#define SHOW_SBITS_BE(name, gb, num) NEG_SSR32(name ## _cache, num)
+
+#ifdef BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num) SHOW_UBITS_LE(name, gb, num)
+#   define SHOW_SBITS(name, gb, num) SHOW_SBITS_LE(name, gb, num)
+#else
+#   define SHOW_UBITS(name, gb, num) SHOW_UBITS_BE(name, gb, num)
+#   define SHOW_SBITS(name, gb, num) SHOW_SBITS_BE(name, gb, num)
+#endif
+
+#define GET_CACHE(name, gb) ((uint32_t) name ## _cache)
+
+static inline int get_bits_count(const GetBitContext *s)
+{
+    return s->index;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n)
+{
+#if UNCHECKED_BITSTREAM_READER
+    s->index += n;
+#else
+    s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index);
+#endif
+}
+
+/**
+ * Read MPEG-1 dc-style VLC (sign bit + mantissa with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ */
+static inline int get_xbits(GetBitContext *s, int n)
+{
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    cache = GET_CACHE(re, s);
+    sign  = ~cache >> 31;
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+
+static inline int get_sbits(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return tmp;
+}
+
+/**
+ * Read 1-25 bits.
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return tmp;
+}
+
+/**
+ * Read 0-25 bits.
+ */
+static av_always_inline int get_bitsz(GetBitContext *s, int n)
+{
+    return n ? get_bits(s, n) : 0;
+}
+
+static inline unsigned int get_bits_le(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE_LE(re, s);
+    tmp = SHOW_UBITS_LE(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return tmp;
+}
+
+/**
+ * Show 1-25 bits.
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER_NOSIZE(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE(re, s);
+    tmp = SHOW_UBITS(re, s, n);
+    return tmp;
+}
+
+static inline void skip_bits(GetBitContext *s, int n)
+{
+    OPEN_READER(re, s);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+}
+
+static inline unsigned int get_bits1(GetBitContext *s)
+{
+    unsigned int index = s->index;
+    uint8_t result     = s->buffer[index >> 3];
+#ifdef BITSTREAM_READER_LE
+    result >>= index & 7;
+    result  &= 1;
+#else
+    result <<= index & 7;
+    result >>= 8 - 1;
+#endif
+#if !UNCHECKED_BITSTREAM_READER
+    if (s->index < s->size_in_bits_plus8)
+#endif
+        index++;
+    s->index = index;
+
+    return result;
+}
+
+static inline unsigned int show_bits1(GetBitContext *s)
+{
+    return show_bits(s, 1);
+}
+
+static inline void skip_bits1(GetBitContext *s)
+{
+    skip_bits(s, 1);
+}
+
+/**
+ * Read 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n)
+{
+    if (!n) {
+        return 0;
+    } else if (n <= MIN_CACHE_BITS) {
+        return get_bits(s, n);
+    } else {
+#ifdef BITSTREAM_READER_LE
+        unsigned ret = get_bits(s, 16);
+        return ret | (get_bits(s, n - 16) << 16);
+#else
+        unsigned ret = get_bits(s, 16) << (n - 16);
+        return ret | get_bits(s, n - 16);
+#endif
+    }
+}
+
+/**
+ * Read 0-64 bits.
+ */
+static inline uint64_t get_bits64(GetBitContext *s, int n)
+{
+    if (n <= 32) {
+        return get_bits_long(s, n);
+    } else {
+#ifdef BITSTREAM_READER_LE
+        uint64_t ret = get_bits_long(s, 32);
+        return ret | (uint64_t) get_bits_long(s, n - 32) << 32;
+#else
+        uint64_t ret = (uint64_t) get_bits_long(s, n - 32) << 32;
+        return ret | get_bits_long(s, 32);
+#endif
+    }
+}
+
+/**
+ * Read 0-32 bits as a signed integer.
+ */
+static inline int get_sbits_long(GetBitContext *s, int n)
+{
+    return sign_extend(get_bits_long(s, n), n);
+}
+
+/**
+ * Show 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n)
+{
+    if (n <= MIN_CACHE_BITS) {
+        return show_bits(s, n);
+    } else {
+        GetBitContext gb = *s;
+        return get_bits_long(&gb, n);
+    }
+}
+
+static inline int check_marker(void *logctx, GetBitContext *s, const char *msg)
+{
+    int bit = get_bits1(s);
+    if (!bit)
+        av_log(logctx, AV_LOG_INFO, "Marker bit missing at %d of %d %s\n",
+               get_bits_count(s) - 1, s->size_in_bits, msg);
+
+    return bit;
+}
+
+/**
+ * Initialize GetBitContext.
+ * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes
+ *        larger than the actual read bits because some optimized bitstream
+ *        readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ * @return 0 on success, AVERROR_INVALIDDATA if the buffer_size would overflow.
+ */
+static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
+                                int bit_size)
+{
+    int buffer_size;
+    int ret = 0;
+
+    if (bit_size >= INT_MAX - 7 || bit_size < 0 || !buffer) {
+        bit_size    = 0;
+        buffer      = NULL;
+        ret         = AVERROR_INVALIDDATA;
+    }
+
+    buffer_size = (bit_size + 7) >> 3;
+
+    s->buffer             = buffer;
+    s->size_in_bits       = bit_size;
+    s->size_in_bits_plus8 = bit_size + 8;
+    s->buffer_end         = buffer + buffer_size;
+    s->index              = 0;
+
+    return ret;
+}
+
+/**
+ * Initialize GetBitContext.
+ * @param buffer bitstream buffer, must be AV_INPUT_BUFFER_PADDING_SIZE bytes
+ *        larger than the actual read bits because some optimized bitstream
+ *        readers read 32 or 64 bit at once and could read over the end
+ * @param byte_size the size of the buffer in bytes
+ * @return 0 on success, AVERROR_INVALIDDATA if the buffer_size would overflow.
+ */
+static inline int init_get_bits8(GetBitContext *s, const uint8_t *buffer,
+                                 int byte_size)
+{
+    if (byte_size > INT_MAX / 8 || byte_size < 0)
+        byte_size = -1;
+    return init_get_bits(s, buffer, byte_size * 8);
+}
+
+static inline const uint8_t *align_get_bits(GetBitContext *s)
+{
+    int n = -get_bits_count(s) & 7;
+    if (n)
+        skip_bits(s, n);
+    return s->buffer + (s->index >> 3);
+}
+
+/**
+ * If the vlc code is invalid and max_depth=1, then no bits will be removed.
+ * If the vlc code is invalid and max_depth>1, then the number of bits removed
+ * is undefined.
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)         \
+    do {                                                        \
+        int n, nb_bits;                                         \
+        unsigned int index;                                     \
+                                                                \
+        index = SHOW_UBITS(name, gb, bits);                     \
+        code  = table[index][0];                                \
+        n     = table[index][1];                                \
+                                                                \
+        if (max_depth > 1 && n < 0) {                           \
+            LAST_SKIP_BITS(name, gb, bits);                     \
+            UPDATE_CACHE(name, gb);                             \
+                                                                \
+            nb_bits = -n;                                       \
+                                                                \
+            index = SHOW_UBITS(name, gb, nb_bits) + code;       \
+            code  = table[index][0];                            \
+            n     = table[index][1];                            \
+            if (max_depth > 2 && n < 0) {                       \
+                LAST_SKIP_BITS(name, gb, nb_bits);              \
+                UPDATE_CACHE(name, gb);                         \
+                                                                \
+                nb_bits = -n;                                   \
+                                                                \
+                index = SHOW_UBITS(name, gb, nb_bits) + code;   \
+                code  = table[index][0];                        \
+                n     = table[index][1];                        \
+            }                                                   \
+        }                                                       \
+        SKIP_BITS(name, gb, n);                                 \
+    } while (0)
+
+#define GET_RL_VLC(level, run, name, gb, table, bits,  \
+                   max_depth, need_update)                      \
+    do {                                                        \
+        int n, nb_bits;                                         \
+        unsigned int index;                                     \
+                                                                \
+        index = SHOW_UBITS(name, gb, bits);                     \
+        level = table[index].level;                             \
+        n     = table[index].len;                               \
+                                                                \
+        if (max_depth > 1 && n < 0) {                           \
+            SKIP_BITS(name, gb, bits);                          \
+            if (need_update) {                                  \
+                UPDATE_CACHE(name, gb);                         \
+            }                                                   \
+                                                                \
+            nb_bits = -n;                                       \
+                                                                \
+            index = SHOW_UBITS(name, gb, nb_bits) + level;      \
+            level = table[index].level;                         \
+            n     = table[index].len;                           \
+            if (max_depth > 2 && n < 0) {                       \
+                LAST_SKIP_BITS(name, gb, nb_bits);              \
+                if (need_update) {                              \
+                    UPDATE_CACHE(name, gb);                     \
+                }                                               \
+                nb_bits = -n;                                   \
+                                                                \
+                index = SHOW_UBITS(name, gb, nb_bits) + level;  \
+                level = table[index].level;                     \
+                n     = table[index].len;                       \
+            }                                                   \
+        }                                                       \
+        run = table[index].run;                                 \
+        SKIP_BITS(name, gb, n);                                 \
+    } while (0)
+
+/**
+ * Parse a vlc code.
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be read to completely
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                     int bits, int max_depth)
+{
+    int code;
+
+    OPEN_READER(re, s);
+    UPDATE_CACHE(re, s);
+
+    GET_VLC(code, re, s, table, bits, max_depth);
+
+    CLOSE_READER(re, s);
+
+    return code;
+}
+
+static inline int decode012(GetBitContext *gb)
+{
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+
+static inline int decode210(GetBitContext *gb)
+{
+    if (get_bits1(gb))
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+
+static inline int get_bits_left(GetBitContext *gb)
+{
+    return gb->size_in_bits - get_bits_count(gb);
+}
+
+static inline int skip_1stop_8data_bits(GetBitContext *gb)
+{
+    if (get_bits_left(gb) <= 0)
+        return AVERROR_INVALIDDATA;
+
+    while (get_bits1(gb)) {
+        skip_bits(gb, 8);
+        if (get_bits_left(gb) <= 0)
+            return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+#endif /* AVCODEC_GET_BITS_H */
diff --git a/media/ffvpx/libavcodec/golomb.c b/media/ffvpx/libavcodec/golomb.c
new file mode 100644
index 000000000..937ac22ce
--- /dev/null
+++ b/media/ffvpx/libavcodec/golomb.c
@@ -0,0 +1,173 @@
+/*
+ * exp golomb vlc stuff
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief
+ *     exp golomb vlc stuff
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/common.h"
+
+const uint8_t ff_golomb_vlc_len[512]={
+19,17,15,15,13,13,13,13,11,11,11,11,11,11,11,11,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+const uint8_t ff_ue_golomb_vlc_code[512]={
+32,32,32,32,32,32,32,32,31,32,32,32,32,32,32,32,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
+ 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+const int8_t ff_se_golomb_vlc_code[512]={
+ 17, 17, 17, 17, 17, 17, 17, 17, 16, 17, 17, 17, 17, 17, 17, 17,  8, -8,  9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15,
+  4,  4,  4,  4, -4, -4, -4, -4,  5,  5,  5,  5, -5, -5, -5, -5,  6,  6,  6,  6, -6, -6, -6, -6,  7,  7,  7,  7, -7, -7, -7, -7,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+
+const uint8_t ff_ue_golomb_len[256]={
+ 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,11,
+11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17,
+};
+
+const uint8_t ff_interleaved_golomb_vlc_len[256]={
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+};
+
+const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={
+ 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3,
+ 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5,
+ 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+const int8_t ff_interleaved_se_golomb_vlc_code[256]={
+  8, -8,  4,  4,  9, -9, -4, -4,  2,  2,  2,  2,  2,  2,  2,  2,
+ 10,-10,  5,  5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ 12,-12,  6,  6, 13,-13, -6, -6,  3,  3,  3,  3,  3,  3,  3,  3,
+ 14,-14,  7,  7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+const uint8_t ff_interleaved_dirac_golomb_vlc_code[256]={
+0, 1, 0, 0, 2, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+4, 5, 2, 2, 6, 7, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+8, 9, 4, 4, 10,11,5, 5, 2, 2, 2, 2, 2, 2, 2, 2,
+12,13,6, 6, 14,15,7, 7, 3, 3, 3, 3, 3, 3, 3, 3,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,};
diff --git a/media/ffvpx/libavcodec/golomb.h b/media/ffvpx/libavcodec/golomb.h
new file mode 100644
index 000000000..917ea54e1
--- /dev/null
+++ b/media/ffvpx/libavcodec/golomb.h
@@ -0,0 +1,579 @@
+/*
+ * exp golomb vlc stuff
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2004 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * @brief
+ *     exp golomb vlc stuff
+ * @author Michael Niedermayer <michaelni@gmx.at> and Alex Beregszaszi
+ */
+
+#ifndef AVCODEC_GOLOMB_H
+#define AVCODEC_GOLOMB_H
+
+#include <stdint.h>
+
+#include "get_bits.h"
+#include "put_bits.h"
+
+#define INVALID_VLC           0x80000000
+
+extern const uint8_t ff_golomb_vlc_len[512];
+extern const uint8_t ff_ue_golomb_vlc_code[512];
+extern const  int8_t ff_se_golomb_vlc_code[512];
+extern const uint8_t ff_ue_golomb_len[256];
+
+extern const uint8_t ff_interleaved_golomb_vlc_len[256];
+extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256];
+extern const  int8_t ff_interleaved_se_golomb_vlc_code[256];
+extern const uint8_t ff_interleaved_dirac_golomb_vlc_code[256];
+
+/**
+ * Read an unsigned Exp-Golomb code in the range 0 to 8190.
+ */
+static inline int get_ue_golomb(GetBitContext *gb)
+{
+    unsigned int buf;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_ue_golomb_vlc_code[buf];
+    } else {
+        int log = 2 * av_log2(buf) - 31;
+        LAST_SKIP_BITS(re, gb, 32 - log);
+        CLOSE_READER(re, gb);
+        if (log < 7) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid UE golomb code\n");
+            return AVERROR_INVALIDDATA;
+        }
+        buf >>= log;
+        buf--;
+
+        return buf;
+    }
+}
+
+/**
+ * Read an unsigned Exp-Golomb code in the range 0 to UINT32_MAX-1.
+ */
+static inline unsigned get_ue_golomb_long(GetBitContext *gb)
+{
+    unsigned buf, log;
+
+    buf = show_bits_long(gb, 32);
+    log = 31 - av_log2(buf);
+    skip_bits_long(gb, log);
+
+    return get_bits_long(gb, log + 1) - 1;
+}
+
+/**
+ * read unsigned exp golomb code, constraint to a max of 31.
+ * the return value is undefined if the stored value exceeds 31.
+ */
+static inline int get_ue_golomb_31(GetBitContext *gb)
+{
+    unsigned int buf;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    buf >>= 32 - 9;
+    LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+    CLOSE_READER(re, gb);
+
+    return ff_ue_golomb_vlc_code[buf];
+}
+
+static inline unsigned get_interleaved_ue_golomb(GetBitContext *gb)
+{
+    uint32_t buf;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    } else {
+        unsigned ret = 1;
+
+        do {
+            buf >>= 32 - 8;
+            LAST_SKIP_BITS(re, gb,
+                           FFMIN(ff_interleaved_golomb_vlc_len[buf], 8));
+
+            if (ff_interleaved_golomb_vlc_len[buf] != 9) {
+                ret <<= (ff_interleaved_golomb_vlc_len[buf] - 1) >> 1;
+                ret  |= ff_interleaved_dirac_golomb_vlc_code[buf];
+                break;
+            }
+            ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
+            UPDATE_CACHE(re, gb);
+            buf = GET_CACHE(re, gb);
+        } while (ret<0x8000000U && BITS_AVAILABLE(re, gb));
+
+        CLOSE_READER(re, gb);
+        return ret - 1;
+    }
+}
+
+/**
+ * read unsigned truncated exp golomb code.
+ */
+static inline int get_te0_golomb(GetBitContext *gb, int range)
+{
+    av_assert2(range >= 1);
+
+    if (range == 1)
+        return 0;
+    else if (range == 2)
+        return get_bits1(gb) ^ 1;
+    else
+        return get_ue_golomb(gb);
+}
+
+/**
+ * read unsigned truncated exp golomb code.
+ */
+static inline int get_te_golomb(GetBitContext *gb, int range)
+{
+    av_assert2(range >= 1);
+
+    if (range == 2)
+        return get_bits1(gb) ^ 1;
+    else
+        return get_ue_golomb(gb);
+}
+
+/**
+ * read signed exp golomb code.
+ */
+static inline int get_se_golomb(GetBitContext *gb)
+{
+    unsigned int buf;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf >= (1 << 27)) {
+        buf >>= 32 - 9;
+        LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_se_golomb_vlc_code[buf];
+    } else {
+        int log = av_log2(buf), sign;
+        LAST_SKIP_BITS(re, gb, 31 - log);
+        UPDATE_CACHE(re, gb);
+        buf = GET_CACHE(re, gb);
+
+        buf >>= log;
+
+        LAST_SKIP_BITS(re, gb, 32 - log);
+        CLOSE_READER(re, gb);
+
+        sign = -(buf & 1);
+        buf  = ((buf >> 1) ^ sign) - sign;
+
+        return buf;
+    }
+}
+
+static inline int get_se_golomb_long(GetBitContext *gb)
+{
+    unsigned int buf = get_ue_golomb_long(gb);
+    int sign = (buf & 1) - 1;
+    return ((buf >> 1) ^ sign) + 1;
+}
+
+static inline int get_interleaved_se_golomb(GetBitContext *gb)
+{
+    unsigned int buf;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    if (buf & 0xAA800000) {
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    } else {
+        int log;
+        LAST_SKIP_BITS(re, gb, 8);
+        UPDATE_CACHE(re, gb);
+        buf |= 1 | (GET_CACHE(re, gb) >> 8);
+
+        if ((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
+
+        for (log = 31; (buf & 0x80000000) == 0; log--)
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+
+        LAST_SKIP_BITS(re, gb, 63 - 2 * log - 8);
+        CLOSE_READER(re, gb);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
+}
+
+static inline int dirac_get_se_golomb(GetBitContext *gb)
+{
+    uint32_t ret = get_interleaved_ue_golomb(gb);
+
+    if (ret) {
+        int sign = -get_bits1(gb);
+        ret = (ret ^ sign) - sign;
+    }
+
+    return ret;
+}
+
+/**
+ * read unsigned golomb rice code (ffv1).
+ */
+static inline int get_ur_golomb(GetBitContext *gb, int k, int limit,
+                                int esc_len)
+{
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    log = av_log2(buf);
+
+    if (log > 31 - limit) {
+        buf >>= log - k;
+        buf  += (30U - log) << k;
+        LAST_SKIP_BITS(re, gb, 32 + k - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    } else {
+        LAST_SKIP_BITS(re, gb, limit);
+        UPDATE_CACHE(re, gb);
+
+        buf = SHOW_UBITS(re, gb, esc_len);
+
+        LAST_SKIP_BITS(re, gb, esc_len);
+        CLOSE_READER(re, gb);
+
+        return buf + limit - 1;
+    }
+}
+
+/**
+ * read unsigned golomb rice code (jpegls).
+ */
+static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
+                                       int esc_len)
+{
+    unsigned int buf;
+    int log;
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);
+    buf = GET_CACHE(re, gb);
+
+    log = av_log2(buf);
+
+    if (log - k >= 32 - MIN_CACHE_BITS + (MIN_CACHE_BITS == 32) &&
+        32 - log < limit) {
+        buf >>= log - k;
+        buf  += (30U - log) << k;
+        LAST_SKIP_BITS(re, gb, 32 + k - log);
+        CLOSE_READER(re, gb);
+
+        return buf;
+    } else {
+        int i;
+        for (i = 0; i < limit && SHOW_UBITS(re, gb, 1) == 0; i++) {
+            if (gb->size_in_bits <= re_index)
+                return -1;
+            LAST_SKIP_BITS(re, gb, 1);
+            UPDATE_CACHE(re, gb);
+        }
+        SKIP_BITS(re, gb, 1);
+
+        if (i < limit - 1) {
+            if (k) {
+                if (k > MIN_CACHE_BITS - 1) {
+                    buf = SHOW_UBITS(re, gb, 16) << (k-16);
+                    LAST_SKIP_BITS(re, gb, 16);
+                    UPDATE_CACHE(re, gb);
+                    buf |= SHOW_UBITS(re, gb, k-16);
+                    LAST_SKIP_BITS(re, gb, k-16);
+                } else {
+                    buf = SHOW_UBITS(re, gb, k);
+                    LAST_SKIP_BITS(re, gb, k);
+                }
+            } else {
+                buf = 0;
+            }
+
+            CLOSE_READER(re, gb);
+            return buf + (i << k);
+        } else if (i == limit - 1) {
+            buf = SHOW_UBITS(re, gb, esc_len);
+            LAST_SKIP_BITS(re, gb, esc_len);
+            CLOSE_READER(re, gb);
+
+            return buf + 1;
+        } else
+            return -1;
+    }
+}
+
+/**
+ * read signed golomb rice code (ffv1).
+ */
+static inline int get_sr_golomb(GetBitContext *gb, int k, int limit,
+                                int esc_len)
+{
+    unsigned v = get_ur_golomb(gb, k, limit, esc_len);
+    return (v >> 1) ^ -(v & 1);
+}
+
+/**
+ * read signed golomb rice code (flac).
+ */
+static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit,
+                                     int esc_len)
+{
+    unsigned v = get_ur_golomb_jpegls(gb, k, limit, esc_len);
+    return (v >> 1) ^ -(v & 1);
+}
+
+/**
+ * read unsigned golomb rice code (shorten).
+ */
+static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k)
+{
+    return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
+}
+
+/**
+ * read signed golomb rice code (shorten).
+ */
+static inline int get_sr_golomb_shorten(GetBitContext *gb, int k)
+{
+    int uvar = get_ur_golomb_jpegls(gb, k + 1, INT_MAX, 0);
+    return (uvar >> 1) ^ -(uvar & 1);
+}
+
+#ifdef TRACE
+
+static inline int get_ue(GetBitContext *s, const char *file, const char *func,
+                         int line)
+{
+    int show = show_bits(s, 24);
+    int pos  = get_bits_count(s);
+    int i    = get_ue_golomb(s);
+    int len  = get_bits_count(s) - pos;
+    int bits = show >> (24 - len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue  @%5d in %s %s:%d\n",
+           bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+static inline int get_se(GetBitContext *s, const char *file, const char *func,
+                         int line)
+{
+    int show = show_bits(s, 24);
+    int pos  = get_bits_count(s);
+    int i    = get_se_golomb(s);
+    int len  = get_bits_count(s) - pos;
+    int bits = show >> (24 - len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se  @%5d in %s %s:%d\n",
+           bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+static inline int get_te(GetBitContext *s, int r, char *file, const char *func,
+                         int line)
+{
+    int show = show_bits(s, 24);
+    int pos  = get_bits_count(s);
+    int i    = get_te0_golomb(s, r);
+    int len  = get_bits_count(s) - pos;
+    int bits = show >> (24 - len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te  @%5d in %s %s:%d\n",
+           bits, len, i, pos, file, func, line);
+
+    return i;
+}
+
+#define get_ue_golomb(a) get_ue(a, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_se_golomb(a) get_se(a, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_te_golomb(a, r)  get_te(a, r, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_te0_golomb(a, r) get_te(a, r, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+
+#endif /* TRACE */
+
+/**
+ * write unsigned exp golomb code.
+ */
+static inline void set_ue_golomb(PutBitContext *pb, int i)
+{
+    av_assert2(i >= 0);
+
+    if (i < 256)
+        put_bits(pb, ff_ue_golomb_len[i], i + 1);
+    else {
+        int e = av_log2(i + 1);
+        put_bits(pb, 2 * e + 1, i + 1);
+    }
+}
+
+/**
+ * write truncated unsigned exp golomb code.
+ */
+static inline void set_te_golomb(PutBitContext *pb, int i, int range)
+{
+    av_assert2(range >= 1);
+    av_assert2(i <= range);
+
+    if (range == 2)
+        put_bits(pb, 1, i ^ 1);
+    else
+        set_ue_golomb(pb, i);
+}
+
+/**
+ * write signed exp golomb code. 16 bits at most.
+ */
+static inline void set_se_golomb(PutBitContext *pb, int i)
+{
+#if 0
+    if (i <= 0)
+        i = -2 * i;
+    else
+        i = 2 * i - 1;
+#elif 1
+    i = 2 * i - 1;
+    if (i < 0)
+        i ^= -1;    //FIXME check if gcc does the right thing
+#else
+    i  = 2 * i - 1;
+    i ^= (i >> 31);
+#endif
+    set_ue_golomb(pb, i);
+}
+
+/**
+ * write unsigned golomb rice code (ffv1).
+ */
+static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit,
+                                 int esc_len)
+{
+    int e;
+
+    av_assert2(i >= 0);
+
+    e = i >> k;
+    if (e < limit)
+        put_bits(pb, e + k + 1, (1 << k) + av_mod_uintp2(i, k));
+    else
+        put_bits(pb, limit + esc_len, i - limit + 1);
+}
+
+/**
+ * write unsigned golomb rice code (jpegls).
+ */
+static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k,
+                                        int limit, int esc_len)
+{
+    int e;
+
+    av_assert2(i >= 0);
+
+    e = (i >> k) + 1;
+    if (e < limit) {
+        while (e > 31) {
+            put_bits(pb, 31, 0);
+            e -= 31;
+        }
+        put_bits(pb, e, 1);
+        if (k)
+            put_sbits(pb, k, i);
+    } else {
+        while (limit > 31) {
+            put_bits(pb, 31, 0);
+            limit -= 31;
+        }
+        put_bits(pb, limit, 1);
+        put_bits(pb, esc_len, i - 1);
+    }
+}
+
+/**
+ * write signed golomb rice code (ffv1).
+ */
+static inline void set_sr_golomb(PutBitContext *pb, int i, int k, int limit,
+                                 int esc_len)
+{
+    int v;
+
+    v  = -2 * i - 1;
+    v ^= (v >> 31);
+
+    set_ur_golomb(pb, v, k, limit, esc_len);
+}
+
+/**
+ * write signed golomb rice code (flac).
+ */
+static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k,
+                                      int limit, int esc_len)
+{
+    int v;
+
+    v  = -2 * i - 1;
+    v ^= (v >> 31);
+
+    set_ur_golomb_jpegls(pb, v, k, limit, esc_len);
+}
+
+#endif /* AVCODEC_GOLOMB_H */
diff --git a/media/ffvpx/libavcodec/h263dsp.h b/media/ffvpx/libavcodec/h263dsp.h
new file mode 100644
index 000000000..1abea3ca8
--- /dev/null
+++ b/media/ffvpx/libavcodec/h263dsp.h
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H263DSP_H
+#define AVCODEC_H263DSP_H
+
+#include <stdint.h>
+
+extern const uint8_t ff_h263_loop_filter_strength[32];
+
+typedef struct H263DSPContext {
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+} H263DSPContext;
+
+void ff_h263dsp_init(H263DSPContext *ctx);
+void ff_h263dsp_init_x86(H263DSPContext *ctx);
+void ff_h263dsp_init_mips(H263DSPContext *ctx);
+
+#endif /* AVCODEC_H263DSP_H */
diff --git a/media/ffvpx/libavcodec/h264chroma.h b/media/ffvpx/libavcodec/h264chroma.h
new file mode 100644
index 000000000..e0f45ad13
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264chroma.h
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H264CHROMA_H
+#define AVCODEC_H264CHROMA_H
+
+#include <stdint.h>
+
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+
+typedef struct H264ChromaContext {
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[4];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[4];
+} H264ChromaContext;
+
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth);
+
+void ff_h264chroma_init_aarch64(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_mips(H264ChromaContext *c, int bit_depth);
+
+#endif /* AVCODEC_H264CHROMA_H */
diff --git a/media/ffvpx/libavcodec/h264dsp.h b/media/ffvpx/libavcodec/h264dsp.h
new file mode 100644
index 000000000..7f24376b8
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264dsp.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 DSP functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_H264DSP_H
+#define AVCODEC_H264DSP_H
+
+#include <stdint.h>
+
+typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
+                                 int log2_denom, int weight, int offset);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
+                                   int stride, int height, int log2_denom,
+                                   int weightd, int weights, int offset);
+
+/**
+ * Context for storing H.264 DSP functions
+ */
+typedef struct H264DSPContext {
+    /* weighted MC */
+    h264_weight_func weight_h264_pixels_tab[4];
+    h264_biweight_func biweight_h264_pixels_tab[4];
+
+    /* loop filter */
+    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride,
+                                    int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride,
+                                    int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride,
+                                          int alpha, int beta, int8_t *tc0);
+    /* v/h_loop_filter_luma_intra: align 16 */
+    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride,
+                                          int alpha, int beta);
+    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride,
+                                          int alpha, int beta);
+    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
+                                                int stride, int alpha, int beta);
+    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride,
+                                      int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride,
+                                      int alpha, int beta, int8_t *tc0);
+    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
+                                            int stride, int alpha, int beta,
+                                            int8_t *tc0);
+    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                            int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
+                                            int stride, int alpha, int beta);
+    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
+                                                  int stride, int alpha, int beta);
+    // h264_loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c
+    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
+                                      int8_t ref[2][40], int16_t mv[2][40][2],
+                                      int bidir, int edges, int step,
+                                      int mask_mv0, int mask_mv1, int field);
+
+    /* IDCT */
+    void (*h264_idct_add)(uint8_t *dst /*align 4*/,
+                          int16_t *block /*align 16*/, int stride);
+    void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
+                           int16_t *block /*align 16*/, int stride);
+    void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
+                             int16_t *block /*align 16*/, int stride);
+    void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
+                              int16_t *block /*align 16*/, int stride);
+
+    void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            int16_t *block /*align 16*/, int stride,
+                            const uint8_t nnzc[15 * 8]);
+    void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                            int16_t *block /*align 16*/, int stride,
+                            const uint8_t nnzc[15 * 8]);
+    void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
+                           int16_t *block /*align 16*/, int stride,
+                           const uint8_t nnzc[15 * 8]);
+    void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
+                                 int16_t *block /*align 16*/,
+                                 int stride, const uint8_t nnzc[15 * 8]);
+    void (*h264_luma_dc_dequant_idct)(int16_t *output,
+                                      int16_t *input /*align 16*/, int qmul);
+    void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);
+
+    /* bypass-transform */
+    void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride);
+    void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride);
+
+    /**
+     * Search buf from the start for up to size bytes. Return the index
+     * of a zero byte, or >= size if not found. Ideally, use lookahead
+     * to filter out any zero bytes that are known to not be followed by
+     * one or more further zero bytes and a one byte. Better still, filter
+     * out any bytes that form the trailing_zero_8bits syntax element too.
+     */
+    int (*startcode_find_candidate)(const uint8_t *buf, int size);
+} H264DSPContext;
+
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+                     const int chroma_format_idc);
+void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
+                             const int chroma_format_idc);
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
+                         const int chroma_format_idc);
+void ff_h264dsp_init_mips(H264DSPContext *c, const int bit_depth,
+                          const int chroma_format_idc);
+
+#endif /* AVCODEC_H264DSP_H */
diff --git a/media/ffvpx/libavcodec/h264pred.c b/media/ffvpx/libavcodec/h264pred.c
new file mode 100644
index 000000000..5632a58fd
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264pred.c
@@ -0,0 +1,603 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "h264pred.h"
+
+#define BIT_DEPTH 8
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 12
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 14
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
+                                   ptrdiff_t stride)
+{
+    const unsigned lt = src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
+                          (t0 + 2*t1 + t2 + 2) >> 2,
+                          (t1 + 2*t2 + t3 + 2) >> 2,
+                          (t2 + 2*t3 + t4 + 2) >> 2);
+
+    AV_WN32A(src+0*stride, v);
+    AV_WN32A(src+1*stride, v);
+    AV_WN32A(src+2*stride, v);
+    AV_WN32A(src+3*stride, v);
+}
+
+static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    const unsigned lt = src[-1-1*stride];
+    LOAD_LEFT_EDGE
+
+    AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101);
+    AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101);
+    AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101);
+    AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
+}
+
+static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l1 + t1)>>1;
+    src[1+0*stride]=
+    src[0+1*stride]=(l2 + t2)>>1;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=
+    src[3+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=(l3 + t3)>>1;
+}
+
+static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
+    src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
+}
+
+static void pred4x4_down_left_rv40_nodown_c(uint8_t *src,
+                                            const uint8_t *topright,
+                                            ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
+    src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
+}
+
+static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright,
+                                       ptrdiff_t stride,
+                                       const int l0, const int l1, const int l2,
+                                       const int l3, const int l4)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+
+    pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
+}
+
+static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+
+    pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
+}
+
+static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright,
+                                        ptrdiff_t stride)
+{
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4 + 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+    src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+    LOAD_DOWN_LEFT_EDGE
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+    src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+    src[2+0*stride]=
+    src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+    src[3+0*stride]=
+    src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+    src[2+1*stride]=
+    src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+    src[3+1*stride]=
+    src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+    src[3+2*stride]=
+    src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
+    src[0+3*stride]=
+    src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
+    src[2+3*stride]=(l4 + l5 + 1)>>1;
+    src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
+}
+
+static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
+    LOAD_LEFT_EDGE
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
+    src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
+    src[2+0*stride]=
+    src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
+    src[3+0*stride]=
+    src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
+    src[2+1*stride]=
+    src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
+    src[3+1*stride]=
+    src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
+    src[3+2*stride]=
+    src[1+3*stride]=l3;
+    src[0+3*stride]=
+    src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright,
+                             ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
+    uint8_t *top = src-stride;
+    int y;
+
+    for (y = 0; y < 4; y++) {
+        const uint8_t *cm_in = cm + src[-1];
+        src[0] = cm_in[top[0]];
+        src[1] = cm_in[top[1]];
+        src[2] = cm_in[top[2]];
+        src[3] = cm_in[top[3]];
+        src += stride;
+    }
+}
+
+static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride)
+{
+    pred16x16_plane_compat_8_c(src, stride, 1, 0);
+}
+
+static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    pred16x16_plane_compat_8_c(src, stride, 0, 1);
+}
+
+static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
+    uint8_t *top = src-stride;
+    int y;
+
+    for (y = 0; y < 16; y++) {
+        const uint8_t *cm_in = cm + src[-1];
+        src[0]  = cm_in[top[0]];
+        src[1]  = cm_in[top[1]];
+        src[2]  = cm_in[top[2]];
+        src[3]  = cm_in[top[3]];
+        src[4]  = cm_in[top[4]];
+        src[5]  = cm_in[top[5]];
+        src[6]  = cm_in[top[6]];
+        src[7]  = cm_in[top[7]];
+        src[8]  = cm_in[top[8]];
+        src[9]  = cm_in[top[9]];
+        src[10] = cm_in[top[10]];
+        src[11] = cm_in[top[11]];
+        src[12] = cm_in[top[12]];
+        src[13] = cm_in[top[13]];
+        src[14] = cm_in[top[14]];
+        src[15] = cm_in[top[15]];
+        src += stride;
+    }
+}
+
+static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    int i;
+    unsigned dc0;
+
+    dc0=0;
+    for(i=0;i<8; i++)
+        dc0+= src[-1+i*stride];
+    dc0= 0x01010101*((dc0 + 4)>>3);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    int i;
+    unsigned dc0;
+
+    dc0=0;
+    for(i=0;i<8; i++)
+        dc0+= src[i-stride];
+    dc0= 0x01010101*((dc0 + 4)>>3);
+
+    for(i=0; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]=
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
+    int i;
+    unsigned dc0 = 0;
+
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc0+= src[4+i-stride];
+        dc0+= src[-1+(i+4)*stride];
+    }
+    dc0= 0x01010101*((dc0 + 8)>>4);
+
+    for(i=0; i<4; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+    for(i=4; i<8; i++){
+        ((uint32_t*)(src+i*stride))[0]= dc0;
+        ((uint32_t*)(src+i*stride))[1]= dc0;
+    }
+}
+
+static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
+    uint8_t *top = src-stride;
+    int y;
+
+    for (y = 0; y < 8; y++) {
+        const uint8_t *cm_in = cm + src[-1];
+        src[0] = cm_in[top[0]];
+        src[1] = cm_in[top[1]];
+        src[2] = cm_in[top[2]];
+        src[3] = cm_in[top[3]];
+        src[4] = cm_in[top[4]];
+        src[5] = cm_in[top[5]];
+        src[6] = cm_in[top[6]];
+        src[7] = cm_in[top[7]];
+        src += stride;
+    }
+}
+
+/**
+ * Set the intra prediction function pointers.
+ */
+av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               int chroma_format_idc)
+{
+#undef FUNC
+#undef FUNCC
+#define FUNC(a, depth) a ## _ ## depth
+#define FUNCC(a, depth) a ## _ ## depth ## _c
+#define FUNCD(a) a ## _c
+
+#define H264_PRED(depth) \
+    if(codec_id != AV_CODEC_ID_RV40){\
+        if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\
+            h->pred4x4[VERT_PRED       ]= FUNCD(pred4x4_vertical_vp8);\
+            h->pred4x4[HOR_PRED        ]= FUNCD(pred4x4_horizontal_vp8);\
+        } else {\
+            h->pred4x4[VERT_PRED       ]= FUNCC(pred4x4_vertical          , depth);\
+            h->pred4x4[HOR_PRED        ]= FUNCC(pred4x4_horizontal        , depth);\
+        }\
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
+        if(codec_id == AV_CODEC_ID_SVQ3)\
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
+        else\
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left     , depth);\
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
+        if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCD(pred4x4_vertical_left_vp8);\
+        } else\
+            h->pred4x4[VERT_LEFT_PRED  ]= FUNCC(pred4x4_vertical_left     , depth);\
+        h->pred4x4[HOR_UP_PRED         ]= FUNCC(pred4x4_horizontal_up     , depth);\
+        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {\
+            h->pred4x4[LEFT_DC_PRED    ]= FUNCC(pred4x4_left_dc           , depth);\
+            h->pred4x4[TOP_DC_PRED     ]= FUNCC(pred4x4_top_dc            , depth);\
+        } else {\
+            h->pred4x4[TM_VP8_PRED     ]= FUNCD(pred4x4_tm_vp8);\
+            h->pred4x4[DC_127_PRED     ]= FUNCC(pred4x4_127_dc            , depth);\
+            h->pred4x4[DC_129_PRED     ]= FUNCC(pred4x4_129_dc            , depth);\
+            h->pred4x4[VERT_VP8_PRED   ]= FUNCC(pred4x4_vertical          , depth);\
+            h->pred4x4[HOR_VP8_PRED    ]= FUNCC(pred4x4_horizontal        , depth);\
+        }\
+        if (codec_id != AV_CODEC_ID_VP8)\
+            h->pred4x4[DC_128_PRED     ]= FUNCC(pred4x4_128_dc            , depth);\
+    }else{\
+        h->pred4x4[VERT_PRED           ]= FUNCC(pred4x4_vertical          , depth);\
+        h->pred4x4[HOR_PRED            ]= FUNCC(pred4x4_horizontal        , depth);\
+        h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
+        h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
+        h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
+        h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
+        h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
+        h->pred4x4[VERT_LEFT_PRED      ]= FUNCD(pred4x4_vertical_left_rv40);\
+        h->pred4x4[HOR_UP_PRED         ]= FUNCD(pred4x4_horizontal_up_rv40);\
+        h->pred4x4[LEFT_DC_PRED        ]= FUNCC(pred4x4_left_dc           , depth);\
+        h->pred4x4[TOP_DC_PRED         ]= FUNCC(pred4x4_top_dc            , depth);\
+        h->pred4x4[DC_128_PRED         ]= FUNCC(pred4x4_128_dc            , depth);\
+        h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
+        h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
+        h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
+    }\
+\
+    h->pred8x8l[VERT_PRED           ]= FUNCC(pred8x8l_vertical            , depth);\
+    h->pred8x8l[HOR_PRED            ]= FUNCC(pred8x8l_horizontal          , depth);\
+    h->pred8x8l[DC_PRED             ]= FUNCC(pred8x8l_dc                  , depth);\
+    h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left           , depth);\
+    h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right          , depth);\
+    h->pred8x8l[VERT_RIGHT_PRED     ]= FUNCC(pred8x8l_vertical_right      , depth);\
+    h->pred8x8l[HOR_DOWN_PRED       ]= FUNCC(pred8x8l_horizontal_down     , depth);\
+    h->pred8x8l[VERT_LEFT_PRED      ]= FUNCC(pred8x8l_vertical_left       , depth);\
+    h->pred8x8l[HOR_UP_PRED         ]= FUNCC(pred8x8l_horizontal_up       , depth);\
+    h->pred8x8l[LEFT_DC_PRED        ]= FUNCC(pred8x8l_left_dc             , depth);\
+    h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
+    h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
+\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical               , depth);\
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal             , depth);\
+    } else {\
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x16_vertical              , depth);\
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x16_horizontal            , depth);\
+    }\
+    if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {\
+        if (chroma_format_idc <= 1) {\
+            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                , depth);\
+        } else {\
+            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane               , depth);\
+        }\
+    } else\
+        h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
+    if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && \
+        codec_id != AV_CODEC_ID_VP8) {\
+        if (chroma_format_idc <= 1) {\
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+        } else {\
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x16_dc                    , depth);\
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc               , depth);\
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc                , depth);\
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
+        }\
+    }else{\
+        h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
+        h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
+        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
+        if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\
+            h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc              , depth);\
+            h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
+        }\
+    }\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                 , depth);\
+    } else {\
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc                , depth);\
+    }\
+\
+    h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
+    h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
+    h->pred16x16[HOR_PRED8x8    ]= FUNCC(pred16x16_horizontal             , depth);\
+    switch(codec_id){\
+    case AV_CODEC_ID_SVQ3:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_svq3);\
+       break;\
+    case AV_CODEC_ID_RV40:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_rv40);\
+       break;\
+    case AV_CODEC_ID_VP7:\
+    case AV_CODEC_ID_VP8:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_tm_vp8);\
+       h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc               , depth);\
+       h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc               , depth);\
+       break;\
+    default:\
+       h->pred16x16[PLANE_PRED8x8  ]= FUNCC(pred16x16_plane               , depth);\
+       break;\
+    }\
+    h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc                , depth);\
+    h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc                 , depth);\
+    h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc                 , depth);\
+\
+    /* special lossless h/v prediction for H.264 */ \
+    h->pred4x4_add  [VERT_PRED   ]= FUNCC(pred4x4_vertical_add            , depth);\
+    h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
+    h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
+    h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
+    h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add           , depth);\
+    h->pred8x8l_filter_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_filter_add         , depth);\
+    if (chroma_format_idc <= 1) {\
+    h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add            , depth);\
+    h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add          , depth);\
+    } else {\
+        h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add            , depth);\
+        h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add          , depth);\
+    }\
+    h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add          , depth);\
+    h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add        , depth);\
+
+    switch (bit_depth) {
+        case 9:
+            H264_PRED(9)
+            break;
+        case 10:
+            H264_PRED(10)
+            break;
+        case 12:
+            H264_PRED(12)
+            break;
+        case 14:
+            H264_PRED(14)
+            break;
+        default:
+            av_assert0(bit_depth<=8);
+            H264_PRED(8)
+            break;
+    }
+
+    if (ARCH_AARCH64)
+        ff_h264_pred_init_aarch64(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_ARM)
+        ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_X86)
+        ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_MIPS)
+        ff_h264_pred_init_mips(h, codec_id, bit_depth, chroma_format_idc);
+}
diff --git a/media/ffvpx/libavcodec/h264pred.h b/media/ffvpx/libavcodec/h264pred.h
new file mode 100644
index 000000000..2863dc9bd
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264pred.h
@@ -0,0 +1,126 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_H264PRED_H
+#define AVCODEC_H264PRED_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * Prediction types
+ */
+//@{
+#define VERT_PRED              0
+#define HOR_PRED               1
+#define DC_PRED                2
+#define DIAG_DOWN_LEFT_PRED    3
+#define DIAG_DOWN_RIGHT_PRED   4
+#define VERT_RIGHT_PRED        5
+#define HOR_DOWN_PRED          6
+#define VERT_LEFT_PRED         7
+#define HOR_UP_PRED            8
+
+// DC edge (not for VP8)
+#define LEFT_DC_PRED           9
+#define TOP_DC_PRED           10
+#define DC_128_PRED           11
+
+// RV40 specific
+#define DIAG_DOWN_LEFT_PRED_RV40_NODOWN   12
+#define HOR_UP_PRED_RV40_NODOWN           13
+#define VERT_LEFT_PRED_RV40_NODOWN        14
+
+// VP8 specific
+#define TM_VP8_PRED            9    ///< "True Motion", used instead of plane
+#define VERT_VP8_PRED         10    ///< for VP8, #VERT_PRED is the average of
+                                    ///< (left col+cur col x2+right col) / 4;
+                                    ///< this is the "unaveraged" one
+#define HOR_VP8_PRED          14    ///< unaveraged version of #HOR_PRED, see
+                                    ///< #VERT_VP8_PRED for details
+#define DC_127_PRED           12
+#define DC_129_PRED           13
+
+#define DC_PRED8x8             0
+#define HOR_PRED8x8            1
+#define VERT_PRED8x8           2
+#define PLANE_PRED8x8          3
+
+// DC edge
+#define LEFT_DC_PRED8x8        4
+#define TOP_DC_PRED8x8         5
+#define DC_128_PRED8x8         6
+
+// H.264/SVQ3 (8x8) specific
+#define ALZHEIMER_DC_L0T_PRED8x8  7
+#define ALZHEIMER_DC_0LT_PRED8x8  8
+#define ALZHEIMER_DC_L00_PRED8x8  9
+#define ALZHEIMER_DC_0L0_PRED8x8 10
+
+// VP8 specific
+#define DC_127_PRED8x8         7
+#define DC_129_PRED8x8         8
+//@}
+
+/**
+ * Context for storing H.264 prediction functions
+ */
+typedef struct H264PredContext {
+    void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright,
+                              ptrdiff_t stride);
+    void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright,
+                           ptrdiff_t stride);
+    void(*pred8x8[4 + 3 + 4])(uint8_t *src, ptrdiff_t stride);
+    void(*pred16x16[4 + 3 + 2])(uint8_t *src, ptrdiff_t stride);
+
+    void(*pred4x4_add[2])(uint8_t *pix /*align  4*/,
+                          int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
+                           int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+                           int16_t *block /*align 16*/, int topleft, int topright, ptrdiff_t stride);
+    void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
+                          const int *block_offset,
+                          int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
+                            const int *block_offset,
+                            int16_t *block /*align 16*/, ptrdiff_t stride);
+} H264PredContext;
+
+void ff_h264_pred_init(H264PredContext *h, int codec_id,
+                       const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_aarch64(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               const int chroma_format_idc);
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
+                           const int bit_depth, const int chroma_format_idc);
+void ff_h264_pred_init_mips(H264PredContext *h, int codec_id,
+                            const int bit_depth, const int chroma_format_idc);
+
+#endif /* AVCODEC_H264PRED_H */
diff --git a/media/ffvpx/libavcodec/h264pred_template.c b/media/ffvpx/libavcodec/h264pred_template.c
new file mode 100644
index 000000000..2b30fff70
--- /dev/null
+++ b/media/ffvpx/libavcodec/h264pred_template.c
@@ -0,0 +1,1355 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 part10 prediction functions.
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/intreadwrite.h"
+
+#include "mathops.h"
+
+#include "bit_depth_template.c"
+
+static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
+                                    ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a= AV_RN4PA(src-stride);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
+                                      ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
+    AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
+    AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
+    AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
+}
+
+static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
+                              ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
+                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
+    const pixel4 a = PIXEL_SPLAT_X4(dc);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
+                                   ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
+    const pixel4 a = PIXEL_SPLAT_X4(dc);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
+    const pixel4 a = PIXEL_SPLAT_X4(dc);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,
+                                  ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
+
+    AV_WN4PA(src+0*stride, a);
+    AV_WN4PA(src+1*stride, a);
+    AV_WN4PA(src+2*stride, a);
+    AV_WN4PA(src+3*stride, a);
+}
+
+
+#define LOAD_TOP_RIGHT_EDGE\
+    const unsigned av_unused t4 = topright[0];\
+    const unsigned av_unused t5 = topright[1];\
+    const unsigned av_unused t6 = topright[2];\
+    const unsigned av_unused t7 = topright[3];\
+
+#define LOAD_DOWN_LEFT_EDGE\
+    const unsigned av_unused l4 = src[-1+4*stride];\
+    const unsigned av_unused l5 = src[-1+5*stride];\
+    const unsigned av_unused l6 = src[-1+6*stride];\
+    const unsigned av_unused l7 = src[-1+7*stride];\
+
+#define LOAD_LEFT_EDGE\
+    const unsigned av_unused l0 = src[-1+0*stride];\
+    const unsigned av_unused l1 = src[-1+1*stride];\
+    const unsigned av_unused l2 = src[-1+2*stride];\
+    const unsigned av_unused l3 = src[-1+3*stride];\
+
+#define LOAD_TOP_EDGE\
+    const unsigned av_unused t0 = src[ 0-1*stride];\
+    const unsigned av_unused t1 = src[ 1-1*stride];\
+    const unsigned av_unused t2 = src[ 2-1*stride];\
+    const unsigned av_unused t3 = src[ 3-1*stride];\
+
+static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
+                                      ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
+    src[0+1*stride]=
+    src[1+2*stride]=
+    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
+    src[0+0*stride]=
+    src[1+1*stride]=
+    src[2+2*stride]=
+    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+0*stride]=
+    src[2+1*stride]=
+    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+0*stride]=
+    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
+                                     ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    const pixel *topright = (const pixel*)_topright;
+    int stride = _stride>>(sizeof(pixel)-1);
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+//    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
+    src[1+0*stride]=
+    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
+    src[2+0*stride]=
+    src[1+1*stride]=
+    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
+    src[3+0*stride]=
+    src[2+1*stride]=
+    src[1+2*stride]=
+    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+2*stride]=
+    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
+    src[3+2*stride]=
+    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
+    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
+                                          const uint8_t *topright,
+                                          ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[1+2*stride]=(lt + t0 + 1)>>1;
+    src[1+0*stride]=
+    src[2+2*stride]=(t0 + t1 + 1)>>1;
+    src[2+0*stride]=
+    src[3+2*stride]=(t1 + t2 + 1)>>1;
+    src[3+0*stride]=(t2 + t3 + 1)>>1;
+    src[0+1*stride]=
+    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[1+1*stride]=
+    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[2+1*stride]=
+    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
+                                         const uint8_t *_topright,
+                                         ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    const pixel *topright = (const pixel*)_topright;
+    int stride = _stride>>(sizeof(pixel)-1);
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+
+    src[0+0*stride]=(t0 + t1 + 1)>>1;
+    src[1+0*stride]=
+    src[0+2*stride]=(t1 + t2 + 1)>>1;
+    src[2+0*stride]=
+    src[1+2*stride]=(t2 + t3 + 1)>>1;
+    src[3+0*stride]=
+    src[2+2*stride]=(t3 + t4+ 1)>>1;
+    src[3+2*stride]=(t4 + t5+ 1)>>1;
+    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[1+1*stride]=
+    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
+    src[2+1*stride]=
+    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
+    src[3+1*stride]=
+    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
+    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
+}
+
+static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
+                                         ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=(l0 + l1 + 1)>>1;
+    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[2+0*stride]=
+    src[0+1*stride]=(l1 + l2 + 1)>>1;
+    src[3+0*stride]=
+    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+    src[2+1*stride]=
+    src[0+2*stride]=(l2 + l3 + 1)>>1;
+    src[3+1*stride]=
+    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
+    src[3+2*stride]=
+    src[1+3*stride]=
+    src[0+3*stride]=
+    src[2+2*stride]=
+    src[2+3*stride]=
+    src[3+3*stride]=l3;
+}
+
+static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
+                                           const uint8_t *topright,
+                                           ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const int lt= src[-1-1*stride];
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
+
+    src[0+0*stride]=
+    src[2+1*stride]=(lt + l0 + 1)>>1;
+    src[1+0*stride]=
+    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
+    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
+    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
+    src[0+1*stride]=
+    src[2+2*stride]=(l0 + l1 + 1)>>1;
+    src[1+1*stride]=
+    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
+    src[0+2*stride]=
+    src[2+3*stride]=(l1 + l2+ 1)>>1;
+    src[1+2*stride]=
+    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
+    src[0+3*stride]=(l2 + l3 + 1)>>1;
+    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
+}
+
+static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
+    const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
+    const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
+    const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
+
+    for(i=0; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
+        AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
+        AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
+    }
+}
+
+static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0; i<16; i++){
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
+
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
+    }
+}
+
+#define PREDICT_16x16_DC(v)\
+    for(i=0; i<16; i++){\
+        AV_WN4PA(src+ 0, v);\
+        AV_WN4PA(src+ 4, v);\
+        AV_WN4PA(src+ 8, v);\
+        AV_WN4PA(src+12, v);\
+        src += stride;\
+    }
+
+static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
+    PREDICT_16x16_DC(dcsplat);
+}
+
+static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0;i<16; i++){
+        dc+= src[-1+i*stride];
+    }
+
+    dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+    PREDICT_16x16_DC(dcsplat);
+}
+
+static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i, dc=0;
+    pixel *src = (pixel*)_src;
+    pixel4 dcsplat;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0;i<16; i++){
+        dc+= src[i-stride];
+    }
+
+    dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
+    PREDICT_16x16_DC(dcsplat);
+}
+
+#define PRED16x16_X(n, v) \
+static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
+{\
+    int i;\
+    pixel *src = (pixel*)_src;\
+    stride >>= sizeof(pixel)-1;\
+    PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
+}
+
+PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
+PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
+PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
+
+static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
+                                                 ptrdiff_t _stride,
+                                                 const int svq3,
+                                                 const int rv40)
+{
+  int i, j, k;
+  int a;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
+  const pixel * const src0 = src +7-stride;
+  const pixel *       src1 = src +8*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=8; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  if(svq3){
+    H = ( 5*(H/4) ) / 16;
+    V = ( 5*(V/4) ) / 16;
+
+    /* required for 100% accuracy */
+    i = H; H = V; V = i;
+  }else if(rv40){
+    H = ( H + (H>>2) ) >> 4;
+    V = ( V + (V>>2) ) >> 4;
+  }else{
+    H = ( 5*H+32 ) >> 6;
+    V = ( 5*V+32 ) >> 6;
+  }
+
+  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    for(i=-16; i<0; i+=4) {
+      src[16+i] = CLIP((b    ) >> 5);
+      src[17+i] = CLIP((b+  H) >> 5);
+      src[18+i] = CLIP((b+2*H) >> 5);
+      src[19+i] = CLIP((b+3*H) >> 5);
+      b += 4*H;
+    }
+    src += stride;
+  }
+}
+
+static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
+}
+
+static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
+    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
+
+    for(i=0; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
+    }
+}
+
+static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
+    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
+
+    for(i=0; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
+    }
+}
+
+static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    for(i=0; i<8; i++){
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
+    }
+}
+
+static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+    for(i=0; i<16; i++){
+        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
+    }
+}
+
+#define PRED8x8_X(n, v)\
+static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
+{\
+    int i;\
+    const pixel4 a = PIXEL_SPLAT_X4(v);\
+    pixel *src = (pixel*)_src;\
+    stride >>= sizeof(pixel)-1;\
+    for(i=0; i<8; i++){\
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
+    }\
+}
+
+PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
+PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
+PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
+
+static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_128_dc)(_src, stride);
+    FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
+}
+
+static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc2;
+    pixel4 dc0splat, dc2splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
+    }
+}
+
+static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_left_dc)(_src, stride);
+    FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
+}
+
+static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1;
+    pixel4 dc0splat, dc1splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+}
+
+static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1;
+    pixel4 dc0splat, dc1splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=0;
+    for(i=0;i<4; i++){
+        dc0+= src[i-stride];
+        dc1+= src[4+i-stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+
+    for(i=0; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+}
+
+static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1, dc2;
+    pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=dc2=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
+    }
+}
+
+static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
+{
+    int i;
+    int dc0, dc1, dc2, dc3, dc4;
+    pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
+    pixel *src = (pixel*)_src;
+    stride >>= sizeof(pixel)-1;
+
+    dc0=dc1=dc2=dc3=dc4=0;
+    for(i=0;i<4; i++){
+        dc0+= src[-1+i*stride] + src[i-stride];
+        dc1+= src[4+i-stride];
+        dc2+= src[-1+(i+4)*stride];
+        dc3+= src[-1+(i+8)*stride];
+        dc4+= src[-1+(i+12)*stride];
+    }
+    dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
+    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
+    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
+    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
+    dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
+    dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
+    dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
+    dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
+
+    for(i=0; i<4; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
+    }
+    for(i=4; i<8; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
+    }
+    for(i=8; i<12; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
+    }
+    for(i=12; i<16; i++){
+        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
+        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
+    }
+}
+
+//the following 4 function should not be optimized!
+static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_top_dc)(src, stride);
+    FUNCC(pred4x4_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_top_dc)(src, stride);
+    FUNCC(pred4x4_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_dc)(src, stride);
+    FUNCC(pred4x4_top_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_dc)(src, stride);
+    FUNCC(pred4x4_top_dc)(src, NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x8_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
+{
+    FUNCC(pred8x16_left_dc)(src, stride);
+    FUNCC(pred4x4_128_dc)(src                  , NULL, stride);
+    FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
+}
+
+static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
+{
+  int j, k;
+  int a;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
+  const pixel * const src0 = src +3-stride;
+  const pixel *       src1 = src +4*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+2*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=4; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  H = ( 17*H+16 ) >> 5;
+  V = ( 17*V+16 ) >> 5;
+
+  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+  for(j=8; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = CLIP((b    ) >> 5);
+    src[1] = CLIP((b+  H) >> 5);
+    src[2] = CLIP((b+2*H) >> 5);
+    src[3] = CLIP((b+3*H) >> 5);
+    src[4] = CLIP((b+4*H) >> 5);
+    src[5] = CLIP((b+5*H) >> 5);
+    src[6] = CLIP((b+6*H) >> 5);
+    src[7] = CLIP((b+7*H) >> 5);
+    src += stride;
+  }
+}
+
+static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
+{
+  int j, k;
+  int a;
+  INIT_CLIP
+  pixel *src = (pixel*)_src;
+  int stride = _stride>>(sizeof(pixel)-1);
+  const pixel * const src0 = src +3-stride;
+  const pixel *       src1 = src +8*stride-1;
+  const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+
+  for (k = 2; k <= 4; ++k) {
+      src1 += stride; src2 -= stride;
+      H += k*(src0[k] - src0[-k]);
+      V += k*(src1[0] - src2[ 0]);
+  }
+  for (; k <= 8; ++k) {
+      src1 += stride; src2 -= stride;
+      V += k*(src1[0] - src2[0]);
+  }
+
+  H = (17*H+16) >> 5;
+  V = (5*V+32) >> 6;
+
+  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = CLIP((b    ) >> 5);
+    src[1] = CLIP((b+  H) >> 5);
+    src[2] = CLIP((b+2*H) >> 5);
+    src[3] = CLIP((b+3*H) >> 5);
+    src[4] = CLIP((b+4*H) >> 5);
+    src[5] = CLIP((b+5*H) >> 5);
+    src[6] = CLIP((b+6*H) >> 5);
+    src[7] = CLIP((b+7*H) >> 5);
+    src += stride;
+  }
+}
+
+#define SRC(x,y) src[(x)+(y)*stride]
+#define PL(y) \
+    const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_LEFT \
+    const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
+                     + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
+    PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
+    const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
+
+#define PT(x) \
+    const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOP \
+    const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
+                     + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
+    PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
+    const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
+                     + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
+
+#define PTR(x) \
+    t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
+#define PREDICT_8x8_LOAD_TOPRIGHT \
+    int t8, t9, t10, t11, t12, t13, t14, t15; \
+    if(has_topright) { \
+        PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
+        t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
+    } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
+
+#define PREDICT_8x8_LOAD_TOPLEFT \
+    const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
+
+#define PREDICT_8x8_DC(v) \
+    int y; \
+    for( y = 0; y < 8; y++ ) { \
+        AV_WN4PA(((pixel4*)src)+0, v); \
+        AV_WN4PA(((pixel4*)src)+1, v); \
+        src += stride; \
+    }
+
+static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
+                                   int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
+}
+static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
+                                    int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_LOAD_LEFT;
+    const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
+    PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
+                                   int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_LOAD_TOP;
+    const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
+    PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
+                               int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOP;
+    const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
+                                     +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
+    PREDICT_8x8_DC(dc);
+}
+static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
+                                       int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    pixel4 a;
+
+    PREDICT_8x8_LOAD_LEFT;
+#define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
+               AV_WN4PA(src+y*stride, a); \
+               AV_WN4PA(src+y*stride+4, a);
+    ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
+#undef ROW
+}
+static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
+                                     int has_topright, ptrdiff_t _stride)
+{
+    int y;
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    pixel4 a, b;
+
+    PREDICT_8x8_LOAD_TOP;
+    src[0] = t0;
+    src[1] = t1;
+    src[2] = t2;
+    src[3] = t3;
+    src[4] = t4;
+    src[5] = t5;
+    src[6] = t6;
+    src[7] = t7;
+    a = AV_RN4PA(((pixel4*)src)+0);
+    b = AV_RN4PA(((pixel4*)src)+1);
+    for( y = 1; y < 8; y++ ) {
+        AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
+        AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
+    }
+}
+static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
+                                      int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
+    SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
+    SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
+    SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
+    SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
+                                       int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
+    SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
+                                           int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
+    SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
+    SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
+    SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
+    SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
+    SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
+    SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
+    SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
+    SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
+    SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
+    SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
+    SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
+    SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
+    SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
+    SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(7,0)= (t6 + t7 + 1) >> 1;
+}
+static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
+                                            int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_LEFT;
+    PREDICT_8x8_LOAD_TOPLEFT;
+    SRC(0,7)= (l6 + l7 + 1) >> 1;
+    SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
+    SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
+    SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
+    SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
+    SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
+    SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
+    SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
+    SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
+    SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
+    SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
+    SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
+    SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
+    SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
+    SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
+    SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
+                                          int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+    PREDICT_8x8_LOAD_TOPRIGHT;
+    SRC(0,0)= (t0 + t1 + 1) >> 1;
+    SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
+    SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
+    SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
+    SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
+    SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
+    SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
+    SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
+    SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
+    SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
+    SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
+    SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
+    SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
+    SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
+    SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
+    SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
+    SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
+    SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
+    SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
+    SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
+    SRC(7,6)= (t10 + t11 + 1) >> 1;
+    SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
+}
+static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
+                                          int has_topright, ptrdiff_t _stride)
+{
+    pixel *src = (pixel*)_src;
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_LEFT;
+    SRC(0,0)= (l0 + l1 + 1) >> 1;
+    SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
+    SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
+    SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
+    SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
+    SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
+    SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
+    SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
+    SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
+    SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
+    SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
+    SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
+    SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
+    SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
+    SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
+    SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
+    SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
+    SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
+}
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
+                                     int has_topright, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    const dctcoef *block = (const dctcoef*)_block;
+    pixel pix[8];
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+
+    pix[0] = t0;
+    pix[1] = t1;
+    pix[2] = t2;
+    pix[3] = t3;
+    pix[4] = t4;
+    pix[5] = t5;
+    pix[6] = t6;
+    pix[7] = t7;
+
+    for(i=0; i<8; i++){
+        pixel v = pix[i];
+        src[0*stride]= v += block[0];
+        src[1*stride]= v += block[8];
+        src[2*stride]= v += block[16];
+        src[3*stride]= v += block[24];
+        src[4*stride]= v += block[32];
+        src[5*stride]= v += block[40];
+        src[6*stride]= v += block[48];
+        src[7*stride]= v +  block[56];
+        src++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
+                               int has_topright, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    const dctcoef *block = (const dctcoef*)_block;
+    pixel pix[8];
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_LEFT;
+
+    pix[0] = l0;
+    pix[1] = l1;
+    pix[2] = l2;
+    pix[3] = l3;
+    pix[4] = l4;
+    pix[5] = l5;
+    pix[6] = l6;
+    pix[7] = l7;
+
+    for(i=0; i<8; i++){
+        pixel v = pix[i];
+        src[0]= v += block[0];
+        src[1]= v += block[1];
+        src[2]= v += block[2];
+        src[3]= v += block[3];
+        src[4]= v += block[4];
+        src[5]= v += block[5];
+        src[6]= v += block[6];
+        src[7]= v +  block[7];
+        src+= stride;
+        block+= 8;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+#undef PREDICT_8x8_LOAD_LEFT
+#undef PREDICT_8x8_LOAD_TOP
+#undef PREDICT_8x8_LOAD_TOPLEFT
+#undef PREDICT_8x8_LOAD_TOPRIGHT
+#undef PREDICT_8x8_DC
+#undef PTR
+#undef PT
+#undef PL
+#undef SRC
+
+static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
+                                        ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    pix -= stride;
+    for(i=0; i<4; i++){
+        pixel v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[4];
+        pix[3*stride]= v += block[8];
+        pix[4*stride]= v +  block[12];
+        pix++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 16);
+}
+
+static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
+                                          ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    for(i=0; i<4; i++){
+        pixel v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v +  block[3];
+        pix+= stride;
+        block+= 4;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 16);
+}
+
+static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
+                                         ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    pix -= stride;
+    for(i=0; i<8; i++){
+        pixel v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[8];
+        pix[3*stride]= v += block[16];
+        pix[4*stride]= v += block[24];
+        pix[5*stride]= v += block[32];
+        pix[6*stride]= v += block[40];
+        pix[7*stride]= v += block[48];
+        pix[8*stride]= v +  block[56];
+        pix++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
+                                           ptrdiff_t stride)
+{
+    int i;
+    pixel *pix = (pixel*)_pix;
+    const dctcoef *block = (const dctcoef*)_block;
+    stride >>= sizeof(pixel)-1;
+    for(i=0; i<8; i++){
+        pixel v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v += block[3];
+        pix[4]= v += block[4];
+        pix[5]= v += block[5];
+        pix[6]= v += block[6];
+        pix[7]= v +  block[7];
+        pix+= stride;
+        block+= 8;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
+                                          int16_t *block,
+                                          ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<16; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
+                                            const int *block_offset,
+                                            int16_t *block,
+                                            ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<16; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
+                                        int16_t *block, ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
+                                         int16_t *block, ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+    for(i=4; i<8; i++)
+        FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
+                                          int16_t *block,
+                                          ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+}
+
+static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
+                                           const int *block_offset,
+                                           int16_t *block, ptrdiff_t stride)
+{
+    int i;
+    for(i=0; i<4; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
+    for(i=4; i<8; i++)
+        FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
+}
diff --git a/media/ffvpx/libavcodec/hpeldsp.h b/media/ffvpx/libavcodec/hpeldsp.h
new file mode 100644
index 000000000..1a3cea54b
--- /dev/null
+++ b/media/ffvpx/libavcodec/hpeldsp.h
@@ -0,0 +1,104 @@
+/*
+ * Half-pel DSP functions.
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Half-pel DSP functions.
+ */
+
+#ifndef AVCODEC_HPELDSP_H
+#define AVCODEC_HPELDSP_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* add and put pixel (decoding) */
+// blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16
+// h for hpel_pixels_func is limited to {width/2, width} but never larger
+// than 16 and never smaller than 4
+typedef void (*op_pixels_func)(uint8_t *block /*align width (8 or 16)*/,
+                               const uint8_t *pixels /*align 1*/,
+                               ptrdiff_t line_size, int h);
+
+/**
+ * Half-pel DSP context.
+ */
+typedef struct HpelDSPContext {
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[4][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[4] of motion compensation functions for 1
+     * horizontal blocksize (16) and the 4 halfpel positions<br>
+     * *pixels_tab[0][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[4];
+} HpelDSPContext;
+
+void ff_hpeldsp_init(HpelDSPContext *c, int flags);
+
+void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags);
+
+#endif /* AVCODEC_HPELDSP_H */
diff --git a/media/ffvpx/libavcodec/idctdsp.h b/media/ffvpx/libavcodec/idctdsp.h
new file mode 100644
index 000000000..b180a6762
--- /dev/null
+++ b/media/ffvpx/libavcodec/idctdsp.h
@@ -0,0 +1,114 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_IDCTDSP_H
+#define AVCODEC_IDCTDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+/**
+ * Scantable.
+ */
+typedef struct ScanTable {
+    const uint8_t *scantable;
+    uint8_t permutated[64];
+    uint8_t raster_end[64];
+} ScanTable;
+
+enum idct_permutation_type {
+    FF_IDCT_PERM_NONE,
+    FF_IDCT_PERM_LIBMPEG2,
+    FF_IDCT_PERM_SIMPLE,
+    FF_IDCT_PERM_TRANSPOSE,
+    FF_IDCT_PERM_PARTTRANS,
+    FF_IDCT_PERM_SSE2,
+};
+
+void ff_init_scantable(uint8_t *permutation, ScanTable *st,
+                       const uint8_t *src_scantable);
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                   enum idct_permutation_type perm_type);
+int ff_init_scantable_permutation_x86(uint8_t *idct_permutation,
+                                      enum idct_permutation_type perm_type);
+
+typedef struct IDCTDSPContext {
+    /* pixel ops : interface with DCT */
+    void (*put_pixels_clamped)(const int16_t *block /* align 16 */,
+                               uint8_t *pixels /* align 8 */,
+                               ptrdiff_t line_size);
+    void (*put_signed_pixels_clamped)(const int16_t *block /* align 16 */,
+                                      uint8_t *pixels /* align 8 */,
+                                      ptrdiff_t line_size);
+    void (*add_pixels_clamped)(const int16_t *block /* align 16 */,
+                               uint8_t *pixels /* align 8 */,
+                               ptrdiff_t line_size);
+
+    void (*idct)(int16_t *block /* align 16 */);
+
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_put)(uint8_t *dest /* align 8 */,
+                     int line_size, int16_t *block /* align 16 */);
+
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_add)(uint8_t *dest /* align 8 */,
+                     int line_size, int16_t *block /* align 16 */);
+
+    /**
+     * IDCT input permutation.
+     * Several optimized IDCTs need a permutated input (relative to the
+     * normal order of the reference IDCT).
+     * This permutation must be performed before the idct_put/add.
+     * Note, normally this can be merged with the zigzag/alternate scan<br>
+     * An example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+     * - (x -> reference DCT -> reference IDCT -> x)
+     * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+     *    -> simple_idct_mmx -> x)
+     * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+     *    -> simple_idct_mmx -> ...)
+     */
+    uint8_t idct_permutation[64];
+    enum idct_permutation_type perm_type;
+} IDCTDSPContext;
+
+extern void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size);
+extern void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size);
+
+void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx);
+
+void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
+void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
+                         unsigned high_bit_depth);
+void ff_idctdsp_init_mips(IDCTDSPContext *c, AVCodecContext *avctx,
+                          unsigned high_bit_depth);
+
+#endif /* AVCODEC_IDCTDSP_H */
diff --git a/media/ffvpx/libavcodec/imgconvert.c b/media/ffvpx/libavcodec/imgconvert.c
new file mode 100644
index 000000000..46fa7809b
--- /dev/null
+++ b/media/ffvpx/libavcodec/imgconvert.c
@@ -0,0 +1,235 @@
+/*
+ * Misc image conversion routines
+ * Copyright (c) 2001, 2002, 2003 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * misc image conversion routines
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "mathops.h"
+#include "libavutil/avassert.h"
+#include "libavutil/colorspace.h"
+#include "libavutil/common.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/internal.h"
+#include "libavutil/imgutils.h"
+
+#if FF_API_GETCHROMA
+void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
+    *h_shift = desc->log2_chroma_w;
+    *v_shift = desc->log2_chroma_h;
+}
+#endif
+
+int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt,
+                             enum AVPixelFormat src_pix_fmt,
+                             int has_alpha)
+{
+    return av_get_pix_fmt_loss(dst_pix_fmt, src_pix_fmt, has_alpha);
+}
+
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr)
+{
+    return av_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr);
+}
+
+#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
+enum AVPixelFormat avcodec_find_best_pix_fmt2(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr){
+    return avcodec_find_best_pix_fmt_of_list(pix_fmt_list, src_pix_fmt, has_alpha, loss_ptr);
+}
+#else
+enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr)
+{
+    return avcodec_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr);
+}
+#endif
+
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr){
+    int i;
+
+    enum AVPixelFormat best = AV_PIX_FMT_NONE;
+
+    for(i=0; pix_fmt_list[i] != AV_PIX_FMT_NONE; i++)
+        best = avcodec_find_best_pix_fmt_of_2(best, pix_fmt_list[i], src_pix_fmt, has_alpha, loss_ptr);
+
+    return best;
+}
+
+#if FF_API_AVPICTURE
+FF_DISABLE_DEPRECATION_WARNINGS
+/* return true if yuv planar */
+static inline int is_yuv_planar(const AVPixFmtDescriptor *desc)
+{
+    int i;
+    int planes[4] = { 0 };
+
+    if (     desc->flags & AV_PIX_FMT_FLAG_RGB
+        || !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))
+        return 0;
+
+    /* set the used planes */
+    for (i = 0; i < desc->nb_components; i++)
+        planes[desc->comp[i].plane] = 1;
+
+    /* if there is an unused plane, the format is not planar */
+    for (i = 0; i < desc->nb_components; i++)
+        if (!planes[i])
+            return 0;
+    return 1;
+}
+
+int av_picture_crop(AVPicture *dst, const AVPicture *src,
+                    enum AVPixelFormat pix_fmt, int top_band, int left_band)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    int y_shift;
+    int x_shift;
+    int max_step[4];
+
+    if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB)
+        return -1;
+
+    y_shift = desc->log2_chroma_h;
+    x_shift = desc->log2_chroma_w;
+    av_image_fill_max_pixsteps(max_step, NULL, desc);
+
+    if (is_yuv_planar(desc)) {
+    dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
+    dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift);
+    dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift);
+    } else{
+        if(top_band % (1<<y_shift) || left_band % (1<<x_shift))
+            return -1;
+        dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + (left_band * max_step[0]);
+    }
+
+    dst->linesize[0] = src->linesize[0];
+    dst->linesize[1] = src->linesize[1];
+    dst->linesize[2] = src->linesize[2];
+    return 0;
+}
+
+int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
+                   enum AVPixelFormat pix_fmt, int padtop, int padbottom, int padleft, int padright,
+            int *color)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    uint8_t *optr;
+    int y_shift;
+    int x_shift;
+    int yheight;
+    int i, y;
+    int max_step[4];
+
+    if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB)
+        return -1;
+
+    if (!is_yuv_planar(desc)) {
+        if (src)
+            return -1; //TODO: Not yet implemented
+
+        av_image_fill_max_pixsteps(max_step, NULL, desc);
+
+        if (padtop || padleft) {
+            memset(dst->data[0], color[0],
+                    dst->linesize[0] * padtop + (padleft * max_step[0]));
+        }
+
+        if (padleft || padright) {
+            optr = dst->data[0] + dst->linesize[0] * padtop +
+                    (dst->linesize[0] - (padright * max_step[0]));
+            yheight = height - 1 - (padtop + padbottom);
+            for (y = 0; y < yheight; y++) {
+                memset(optr, color[0], (padleft + padright) * max_step[0]);
+                optr += dst->linesize[0];
+            }
+        }
+
+        if (padbottom || padright) {
+            optr = dst->data[0] + dst->linesize[0] * (height - padbottom) -
+                    (padright * max_step[0]);
+            memset(optr, color[0], dst->linesize[0] * padbottom +
+                    (padright * max_step[0]));
+        }
+
+        return 0;
+    }
+
+    for (i = 0; i < 3; i++) {
+        x_shift = i ? desc->log2_chroma_w : 0;
+        y_shift = i ? desc->log2_chroma_h : 0;
+
+        if (padtop || padleft) {
+            memset(dst->data[i], color[i],
+                dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift));
+        }
+
+        if (padleft || padright) {
+            optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) +
+                (dst->linesize[i] - (padright >> x_shift));
+            yheight = (height - 1 - (padtop + padbottom)) >> y_shift;
+            for (y = 0; y < yheight; y++) {
+                memset(optr, color[i], (padleft + padright) >> x_shift);
+                optr += dst->linesize[i];
+            }
+        }
+
+        if (src) { /* first line */
+            uint8_t *iptr = src->data[i];
+            optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) +
+                    (padleft >> x_shift);
+            memcpy(optr, iptr, (width - padleft - padright) >> x_shift);
+            iptr += src->linesize[i];
+            optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) +
+                (dst->linesize[i] - (padright >> x_shift));
+            yheight = (height - 1 - (padtop + padbottom)) >> y_shift;
+            for (y = 0; y < yheight; y++) {
+                memset(optr, color[i], (padleft + padright) >> x_shift);
+                memcpy(optr + ((padleft + padright) >> x_shift), iptr,
+                       (width - padleft - padright) >> x_shift);
+                iptr += src->linesize[i];
+                optr += dst->linesize[i];
+            }
+        }
+
+        if (padbottom || padright) {
+            optr = dst->data[i] + dst->linesize[i] *
+                ((height - padbottom) >> y_shift) - (padright >> x_shift);
+            memset(optr, color[i],dst->linesize[i] *
+                (padbottom >> y_shift) + (padright >> x_shift));
+        }
+    }
+
+    return 0;
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AVPICTURE */
diff --git a/media/ffvpx/libavcodec/internal.h b/media/ffvpx/libavcodec/internal.h
new file mode 100644
index 000000000..000fe263c
--- /dev/null
+++ b/media/ffvpx/libavcodec/internal.h
@@ -0,0 +1,363 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * common internal api header.
+ */
+
+#ifndef AVCODEC_INTERNAL_H
+#define AVCODEC_INTERNAL_H
+
+#include <stdint.h>
+
+#include "libavutil/buffer.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/pixfmt.h"
+#include "avcodec.h"
+#include "config.h"
+
+/**
+ * The codec does not modify any global variables in the init function,
+ * allowing to call the init function without locking any global mutexes.
+ */
+#define FF_CODEC_CAP_INIT_THREADSAFE        (1 << 0)
+/**
+ * The codec allows calling the close function for deallocation even if
+ * the init function returned a failure. Without this capability flag, a
+ * codec does such cleanup internally when returning failures from the
+ * init function and does not expect the close function to be called at
+ * all.
+ */
+#define FF_CODEC_CAP_INIT_CLEANUP           (1 << 1)
+/**
+ * Decoders marked with FF_CODEC_CAP_SETS_PKT_DTS want to set
+ * AVFrame.pkt_dts manually. If the flag is set, utils.c won't overwrite
+ * this field. If it's unset, utils.c tries to guess the pkt_dts field
+ * from the input AVPacket.
+ */
+#define FF_CODEC_CAP_SETS_PKT_DTS           (1 << 2)
+/**
+ * The decoder extracts and fills its parameters even if the frame is
+ * skipped due to the skip_frame setting.
+ */
+#define FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM  (1 << 3)
+
+#ifdef TRACE
+#   define ff_tlog(ctx, ...) av_log(ctx, AV_LOG_TRACE, __VA_ARGS__)
+#else
+#   define ff_tlog(ctx, ...) do { } while(0)
+#endif
+
+
+#if !FF_API_QUANT_BIAS
+#define FF_DEFAULT_QUANT_BIAS 999999
+#endif
+
+#define FF_SANE_NB_CHANNELS 64U
+
+#define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1)
+
+#if HAVE_AVX
+#   define STRIDE_ALIGN 32
+#elif HAVE_SIMD_ALIGN_16
+#   define STRIDE_ALIGN 16
+#else
+#   define STRIDE_ALIGN 8
+#endif
+
+typedef struct FramePool {
+    /**
+     * Pools for each data plane. For audio all the planes have the same size,
+     * so only pools[0] is used.
+     */
+    AVBufferPool *pools[4];
+
+    /*
+     * Pool parameters
+     */
+    int format;
+    int width, height;
+    int stride_align[AV_NUM_DATA_POINTERS];
+    int linesize[4];
+    int planes;
+    int channels;
+    int samples;
+} FramePool;
+
+typedef struct AVCodecInternal {
+    /**
+     * Whether the parent AVCodecContext is a copy of the context which had
+     * init() called on it.
+     * This is used by multithreading - shared tables and picture pointers
+     * should be freed from the original context only.
+     */
+    int is_copy;
+
+    /**
+     * Whether to allocate progress for frame threading.
+     *
+     * The codec must set it to 1 if it uses ff_thread_await/report_progress(),
+     * then progress will be allocated in ff_thread_get_buffer(). The frames
+     * then MUST be freed with ff_thread_release_buffer().
+     *
+     * If the codec does not need to call the progress functions (there are no
+     * dependencies between the frames), it should leave this at 0. Then it can
+     * decode straight to the user-provided frames (which the user will then
+     * free with av_frame_unref()), there is no need to call
+     * ff_thread_release_buffer().
+     */
+    int allocate_progress;
+
+    /**
+     * An audio frame with less than required samples has been submitted and
+     * padded with silence. Reject all subsequent frames.
+     */
+    int last_audio_frame;
+
+    AVFrame *to_free;
+
+    FramePool *pool;
+
+    void *thread_ctx;
+
+    /**
+     * Current packet as passed into the decoder, to avoid having to pass the
+     * packet into every function.
+     */
+    AVPacket *pkt;
+
+    /**
+     * temporary buffer used for encoders to store their bitstream
+     */
+    uint8_t *byte_buffer;
+    unsigned int byte_buffer_size;
+
+    void *frame_thread_encoder;
+
+    /**
+     * Number of audio samples to skip at the start of the next decoded frame
+     */
+    int skip_samples;
+
+    /**
+     * hwaccel-specific private data
+     */
+    void *hwaccel_priv_data;
+
+    /**
+     * checks API usage: after codec draining, flush is required to resume operation
+     */
+    int draining;
+
+    /**
+     * buffers for using new encode/decode API through legacy API
+     */
+    AVPacket *buffer_pkt;
+    int buffer_pkt_valid; // encoding: packet without data can be valid
+    AVFrame *buffer_frame;
+    int draining_done;
+} AVCodecInternal;
+
+struct AVCodecDefault {
+    const uint8_t *key;
+    const uint8_t *value;
+};
+
+extern const uint8_t ff_log2_run[41];
+
+/**
+ * Return the index into tab at which {a,b} match elements {[0],[1]} of tab.
+ * If there is no such matching pair then size is returned.
+ */
+int ff_match_2uint16(const uint16_t (*tab)[2], int size, int a, int b);
+
+unsigned int avpriv_toupper4(unsigned int x);
+
+/**
+ * does needed setup of pkt_pts/pos and such for (re)get_buffer();
+ */
+int ff_init_buffer_info(AVCodecContext *s, AVFrame *frame);
+
+
+void ff_color_frame(AVFrame *frame, const int color[4]);
+
+extern volatile int ff_avcodec_locked;
+int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec);
+int ff_unlock_avcodec(const AVCodec *codec);
+
+int avpriv_lock_avformat(void);
+int avpriv_unlock_avformat(void);
+
+/**
+ * Maximum size in bytes of extradata.
+ * This value was chosen such that every bit of the buffer is
+ * addressable by a 32-bit signed integer as used by get_bits.
+ */
+#define FF_MAX_EXTRADATA_SIZE ((1 << 28) - AV_INPUT_BUFFER_PADDING_SIZE)
+
+/**
+ * Check AVPacket size and/or allocate data.
+ *
+ * Encoders supporting AVCodec.encode2() can use this as a convenience to
+ * ensure the output packet data is large enough, whether provided by the user
+ * or allocated in this function.
+ *
+ * @param avctx   the AVCodecContext of the encoder
+ * @param avpkt   the AVPacket
+ *                If avpkt->data is already set, avpkt->size is checked
+ *                to ensure it is large enough.
+ *                If avpkt->data is NULL, a new buffer is allocated.
+ *                avpkt->size is set to the specified size.
+ *                All other AVPacket fields will be reset with av_init_packet().
+ * @param size    the minimum required packet size
+ * @param min_size This is a hint to the allocation algorithm, which indicates
+ *                to what minimal size the caller might later shrink the packet
+ *                to. Encoders often allocate packets which are larger than the
+ *                amount of data that is written into them as the exact amount is
+ *                not known at the time of allocation. min_size represents the
+ *                size a packet might be shrunk to by the caller. Can be set to
+ *                0. setting this roughly correctly allows the allocation code
+ *                to choose between several allocation strategies to improve
+ *                speed slightly.
+ * @return        non negative on success, negative error code on failure
+ */
+int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size);
+
+attribute_deprecated int ff_alloc_packet(AVPacket *avpkt, int size);
+
+/**
+ * Rescale from sample rate to AVCodecContext.time_base.
+ */
+static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx,
+                                                        int64_t samples)
+{
+    if(samples == AV_NOPTS_VALUE)
+        return AV_NOPTS_VALUE;
+    return av_rescale_q(samples, (AVRational){ 1, avctx->sample_rate },
+                        avctx->time_base);
+}
+
+/**
+ * 2^(x) for integer x
+ * @return correctly rounded float
+ */
+static av_always_inline float ff_exp2fi(int x) {
+    /* Normal range */
+    if (-126 <= x && x <= 128)
+        return av_int2float(x+127 << 23);
+    /* Too large */
+    else if (x > 128)
+        return INFINITY;
+    /* Subnormal numbers */
+    else if (x > -150)
+        return av_int2float(1 << (x+149));
+    /* Negligibly small */
+    else
+        return 0;
+}
+
+/**
+ * Get a buffer for a frame. This is a wrapper around
+ * AVCodecContext.get_buffer() and should be used instead calling get_buffer()
+ * directly.
+ */
+int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
+
+/**
+ * Identical in function to av_frame_make_writable(), except it uses
+ * ff_get_buffer() to allocate the buffer when needed.
+ */
+int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame);
+
+int ff_thread_can_start_frame(AVCodecContext *avctx);
+
+int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx);
+
+/**
+ * Call avcodec_open2 recursively by decrementing counter, unlocking mutex,
+ * calling the function and then restoring again. Assumes the mutex is
+ * already locked
+ */
+int ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
+
+/**
+ * Finalize buf into extradata and set its size appropriately.
+ */
+int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf);
+
+const uint8_t *avpriv_find_start_code(const uint8_t *p,
+                                      const uint8_t *end,
+                                      uint32_t *state);
+
+int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec);
+
+/**
+ * Check that the provided frame dimensions are valid and set them on the codec
+ * context.
+ */
+int ff_set_dimensions(AVCodecContext *s, int width, int height);
+
+/**
+ * Check that the provided sample aspect ratio is valid and set it on the codec
+ * context.
+ */
+int ff_set_sar(AVCodecContext *avctx, AVRational sar);
+
+/**
+ * Add or update AV_FRAME_DATA_MATRIXENCODING side data.
+ */
+int ff_side_data_update_matrix_encoding(AVFrame *frame,
+                                        enum AVMatrixEncoding matrix_encoding);
+
+/**
+ * Select the (possibly hardware accelerated) pixel format.
+ * This is a wrapper around AVCodecContext.get_format() and should be used
+ * instead of calling get_format() directly.
+ */
+int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt);
+
+/**
+ * Set various frame properties from the codec context / packet data.
+ */
+int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame);
+
+/**
+ * Add a CPB properties side data to an encoding context.
+ */
+AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx);
+
+int ff_side_data_set_encoder_stats(AVPacket *pkt, int quality, int64_t *error, int error_count, int pict_type);
+
+/**
+ * Check AVFrame for A53 side data and allocate and fill SEI message with A53 info
+ *
+ * @param frame      Raw frame to get A53 side data from
+ * @param prefix_len Number of bytes to allocate before SEI message
+ * @param data       Pointer to a variable to store allocated memory
+ *                   Upon return the variable will hold NULL on error or if frame has no A53 info.
+ *                   Otherwise it will point to prefix_len uninitialized bytes followed by
+ *                   *sei_size SEI message
+ * @param sei_size   Pointer to a variable to store generated SEI message length
+ * @return           Zero on success, negative error code on failure
+ */
+int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len,
+                     void **data, size_t *sei_size);
+
+#endif /* AVCODEC_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/log2_tab.c b/media/ffvpx/libavcodec/log2_tab.c
new file mode 100644
index 000000000..47a1df03b
--- /dev/null
+++ b/media/ffvpx/libavcodec/log2_tab.c
@@ -0,0 +1 @@
+#include "libavutil/log2_tab.c"
diff --git a/media/ffvpx/libavcodec/mathops.h b/media/ffvpx/libavcodec/mathops.h
new file mode 100644
index 000000000..5168dc2ce
--- /dev/null
+++ b/media/ffvpx/libavcodec/mathops.h
@@ -0,0 +1,252 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_MATHOPS_H
+#define AVCODEC_MATHOPS_H
+
+#include <stdint.h>
+
+#include "libavutil/common.h"
+#include "config.h"
+
+#define MAX_NEG_CROP 1024
+
+extern const uint32_t ff_inverse[257];
+extern const uint8_t ff_sqrt_tab[256];
+extern const uint8_t ff_crop_tab[256 + 2 * MAX_NEG_CROP];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag_scan[16+1];
+
+#if   ARCH_ARM
+#   include "arm/mathops.h"
+#elif ARCH_AVR32
+#   include "avr32/mathops.h"
+#elif ARCH_MIPS
+#   include "mips/mathops.h"
+#elif ARCH_PPC
+#   include "ppc/mathops.h"
+#elif ARCH_X86
+#   include "x86/mathops.h"
+#endif
+
+/* generic implementation */
+
+#ifndef MUL64
+#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#endif
+
+#ifndef MULL
+#   define MULL(a,b,s) (MUL64(a, b) >> (s))
+#endif
+
+#ifndef MULH
+static av_always_inline int MULH(int a, int b){
+    return MUL64(a, b) >> 32;
+}
+#endif
+
+#ifndef UMULH
+static av_always_inline unsigned UMULH(unsigned a, unsigned b){
+    return ((uint64_t)(a) * (uint64_t)(b))>>32;
+}
+#endif
+
+#ifndef MAC64
+#   define MAC64(d, a, b) ((d) += MUL64(a, b))
+#endif
+
+#ifndef MLS64
+#   define MLS64(d, a, b) ((d) -= MUL64(a, b))
+#endif
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#ifndef MAC16
+#   define MAC16(rt, ra, rb) rt += (ra) * (rb)
+#endif
+
+/* signed 16x16 -> 32 multiply */
+#ifndef MUL16
+#   define MUL16(ra, rb) ((ra) * (rb))
+#endif
+
+#ifndef MLS16
+#   define MLS16(rt, ra, rb) ((rt) -= (ra) * (rb))
+#endif
+
+/* median of 3 */
+#ifndef mid_pred
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+#if 0
+    int t= (a-b)&((a-b)>>31);
+    a-=t;
+    b+=t;
+    b-= (b-c)&((b-c)>>31);
+    b+= (a-b)&((a-b)>>31);
+
+    return b;
+#else
+    if(a>b){
+        if(c>b){
+            if(c>a) b=a;
+            else    b=c;
+        }
+    }else{
+        if(b>c){
+            if(c>a) b=c;
+            else    b=a;
+        }
+    }
+    return b;
+#endif
+}
+#endif
+
+#ifndef median4
+#define median4 median4
+static inline av_const int median4(int a, int b, int c, int d)
+{
+    if (a < b) {
+        if (c < d) return (FFMIN(b, d) + FFMAX(a, c)) / 2;
+        else       return (FFMIN(b, c) + FFMAX(a, d)) / 2;
+    } else {
+        if (c < d) return (FFMIN(a, d) + FFMAX(b, c)) / 2;
+        else       return (FFMIN(a, c) + FFMAX(b, d)) / 2;
+    }
+}
+#endif
+
+#ifndef sign_extend
+static inline av_const int sign_extend(int val, unsigned bits)
+{
+    unsigned shift = 8 * sizeof(int) - bits;
+    union { unsigned u; int s; } v = { (unsigned) val << shift };
+    return v.s >> shift;
+}
+#endif
+
+#ifndef zero_extend
+static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
+{
+    return (val << ((8 * sizeof(int)) - bits)) >> ((8 * sizeof(int)) - bits);
+}
+#endif
+
+#ifndef COPY3_IF_LT
+#define COPY3_IF_LT(x, y, a, b, c, d)\
+if ((y) < (x)) {\
+    (x) = (y);\
+    (a) = (b);\
+    (c) = (d);\
+}
+#endif
+
+#ifndef MASK_ABS
+#define MASK_ABS(mask, level) do {              \
+        mask  = level >> 31;                    \
+        level = (level ^ mask) - mask;          \
+    } while (0)
+#endif
+
+#ifndef NEG_SSR32
+#   define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#endif
+
+#ifndef NEG_USR32
+#   define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
+#if HAVE_BIGENDIAN
+# ifndef PACK_2U8
+#   define PACK_2U8(a,b)     (((a) <<  8) | (b))
+# endif
+# ifndef PACK_4U8
+#   define PACK_4U8(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+# endif
+# ifndef PACK_2U16
+#   define PACK_2U16(a,b)    (((a) << 16) | (b))
+# endif
+#else
+# ifndef PACK_2U8
+#   define PACK_2U8(a,b)     (((b) <<  8) | (a))
+# endif
+# ifndef PACK_4U2
+#   define PACK_4U8(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a))
+# endif
+# ifndef PACK_2U16
+#   define PACK_2U16(a,b)    (((b) << 16) | (a))
+# endif
+#endif
+
+#ifndef PACK_2S8
+#   define PACK_2S8(a,b)     PACK_2U8((a)&255, (b)&255)
+#endif
+#ifndef PACK_4S8
+#   define PACK_4S8(a,b,c,d) PACK_4U8((a)&255, (b)&255, (c)&255, (d)&255)
+#endif
+#ifndef PACK_2S16
+#   define PACK_2S16(a,b)    PACK_2U16((a)&0xffff, (b)&0xffff)
+#endif
+
+#ifndef FASTDIV
+#   define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32))
+#endif /* FASTDIV */
+
+#ifndef ff_sqrt
+#define ff_sqrt ff_sqrt
+static inline av_const unsigned int ff_sqrt(unsigned int a)
+{
+    unsigned int b;
+
+    if (a < 255) return (ff_sqrt_tab[a + 1] - 1) >> 4;
+    else if (a < (1 << 12)) b = ff_sqrt_tab[a >> 4] >> 2;
+#if !CONFIG_SMALL
+    else if (a < (1 << 14)) b = ff_sqrt_tab[a >> 6] >> 1;
+    else if (a < (1 << 16)) b = ff_sqrt_tab[a >> 8]   ;
+#endif
+    else {
+        int s = av_log2_16bit(a >> 16) >> 1;
+        unsigned int c = a >> (s + 2);
+        b = ff_sqrt_tab[c >> (s + 8)];
+        b = FASTDIV(c,b) + (b << s);
+    }
+
+    return b - (a < b * b);
+}
+#endif
+
+static inline av_const float ff_sqrf(float a)
+{
+    return a*a;
+}
+
+static inline int8_t ff_u8_to_s8(uint8_t a)
+{
+    union {
+        uint8_t u8;
+        int8_t  s8;
+    } b;
+    b.u8 = a;
+    return b.s8;
+}
+
+#endif /* AVCODEC_MATHOPS_H */
diff --git a/media/ffvpx/libavcodec/mathtables.c b/media/ffvpx/libavcodec/mathtables.c
new file mode 100644
index 000000000..81eabc7a6
--- /dev/null
+++ b/media/ffvpx/libavcodec/mathtables.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "mathops.h"
+
+/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
+ * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
+const uint32_t ff_inverse[257]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+  16777216
+};
+
+const uint8_t ff_sqrt_tab[256]={
+  0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
+ 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
+157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
+182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
+203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
+222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
+240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
+};
+
+#define times4(x) x, x, x, x
+#define times256(x) times4(times4(times4(times4(times4(x)))))
+
+const uint8_t ff_crop_tab[256 + 2 * MAX_NEG_CROP] = {
+times256(0x00),
+0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
+0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
+0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
+0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
+0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
+0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
+0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
+0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
+0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
+0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
+0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
+0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
+0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
+0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
+0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
+0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
+times256(0xFF)
+};
+
+const uint8_t ff_zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+const uint8_t ff_zigzag_scan[16+1] = {
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
+    1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
+    3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
+};
diff --git a/media/ffvpx/libavcodec/me_cmp.h b/media/ffvpx/libavcodec/me_cmp.h
new file mode 100644
index 000000000..a3603ec2c
--- /dev/null
+++ b/media/ffvpx/libavcodec/me_cmp.h
@@ -0,0 +1,96 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ME_CMP_H
+#define AVCODEC_ME_CMP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+extern uint32_t ff_square_tab[512];
+
+
+/* minimum alignment rules ;)
+ * If you notice errors in the align stuff, need more alignment for some ASM code
+ * for some CPU or need to use a function with less aligned data then send a mail
+ * to the ffmpeg-devel mailing list, ...
+ *
+ * !warning These alignments might not match reality, (missing attribute((align))
+ * stuff somewhere possible).
+ * I (Michael) did not check them, these are just the alignments which I think
+ * could be reached easily ...
+ *
+ * !future video codecs might need functions with less strict alignment
+ */
+
+struct MpegEncContext;
+/* Motion estimation:
+ * h is limited to { width / 2, width, 2 * width },
+ * but never larger than 16 and never smaller than 2.
+ * Although currently h < 4 is not used as functions with
+ * width < 8 are neither used nor implemented. */
+typedef int (*me_cmp_func)(struct MpegEncContext *c,
+                           uint8_t *blk1 /* align width (8 or 16) */,
+                           uint8_t *blk2 /* align 1 */, ptrdiff_t stride,
+                           int h);
+
+typedef struct MECmpContext {
+    int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
+
+    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[6];
+    me_cmp_func hadamard8_diff[6];
+    me_cmp_func dct_sad[6];
+    me_cmp_func quant_psnr[6];
+    me_cmp_func bit[6];
+    me_cmp_func rd[6];
+    me_cmp_func vsad[6];
+    me_cmp_func vsse[6];
+    me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
+    me_cmp_func dct_max[6];
+    me_cmp_func dct264_sad[6];
+
+    me_cmp_func me_pre_cmp[6];
+    me_cmp_func me_cmp[6];
+    me_cmp_func me_sub_cmp[6];
+    me_cmp_func mb_cmp[6];
+    me_cmp_func ildct_cmp[6]; // only width 16 used
+    me_cmp_func frame_skip_cmp[6]; // only width 8 used
+
+    me_cmp_func pix_abs[2][4];
+} MECmpContext;
+
+void ff_me_cmp_init_static(void);
+
+int ff_check_alignment(void);
+
+void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_mips(MECmpContext *c, AVCodecContext *avctx);
+
+void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type);
+
+void ff_dsputil_init_dwt(MECmpContext *c);
+
+#endif /* AVCODEC_ME_CMP_H */
diff --git a/media/ffvpx/libavcodec/motion_est.h b/media/ffvpx/libavcodec/motion_est.h
new file mode 100644
index 000000000..3b3a8d734
--- /dev/null
+++ b/media/ffvpx/libavcodec/motion_est.h
@@ -0,0 +1,135 @@
+/*
+ * Motion estimation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MOTION_EST_H
+#define AVCODEC_MOTION_EST_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "hpeldsp.h"
+#include "qpeldsp.h"
+
+struct MpegEncContext;
+
+#if ARCH_IA64 // Limit static arrays to avoid gcc failing "short data segment overflowed"
+#define MAX_MV 1024
+#else
+#define MAX_MV 4096
+#endif
+#define MAX_DMV (2*MAX_MV)
+#define ME_MAP_SIZE 64
+
+#define FF_ME_ZERO 0
+#define FF_ME_EPZS 1
+#define FF_ME_XONE 2
+
+/**
+ * Motion estimation context.
+ */
+typedef struct MotionEstContext {
+    AVCodecContext *avctx;
+    int skip;                       ///< set if ME is skipped for the current MB
+    int co_located_mv[4][2];        ///< mv from last P-frame for direct mode ME
+    int direct_basis_mv[4][2];
+    uint8_t *scratchpad;            /**< data area for the ME algo, so that
+                                     * the ME does not need to malloc/free. */
+    uint8_t *best_mb;
+    uint8_t *temp_mb[2];
+    uint8_t *temp;
+    int best_bits;
+    uint32_t *map;                  ///< map to avoid duplicate evaluations
+    uint32_t *score_map;            ///< map to store the scores
+    unsigned map_generation;
+    int pre_penalty_factor;
+    int penalty_factor;             /**< an estimate of the bits required to
+                                     * code a given mv value, e.g. (1,0) takes
+                                     * more bits than (0,0). We have to
+                                     * estimate whether any reduction in
+                                     * residual is worth the extra bits. */
+    int sub_penalty_factor;
+    int mb_penalty_factor;
+    int flags;
+    int sub_flags;
+    int mb_flags;
+    int pre_pass;                   ///< = 1 for the pre pass
+    int dia_size;
+    int xmin;
+    int xmax;
+    int ymin;
+    int ymax;
+    int pred_x;
+    int pred_y;
+    uint8_t *src[4][4];
+    uint8_t *ref[4][4];
+    int stride;
+    int uvstride;
+    /* temp variables for picture complexity calculation */
+    int64_t mc_mb_var_sum_temp;
+    int64_t mb_var_sum_temp;
+    int scene_change_score;
+
+    op_pixels_func(*hpel_put)[4];
+    op_pixels_func(*hpel_avg)[4];
+    qpel_mc_func(*qpel_put)[16];
+    qpel_mc_func(*qpel_avg)[16];
+    uint8_t (*mv_penalty)[MAX_DMV * 2 + 1]; ///< bit amount needed to encode a MV
+    uint8_t *current_mv_penalty;
+    int (*sub_motion_search)(struct MpegEncContext *s,
+                             int *mx_ptr, int *my_ptr, int dmin,
+                             int src_index, int ref_index,
+                             int size, int h);
+} MotionEstContext;
+
+static inline int ff_h263_round_chroma(int x)
+{
+    //FIXME static or not?
+    static const uint8_t h263_chroma_roundtab[16] = {
+    //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+        0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
+    };
+    return h263_chroma_roundtab[x & 0xf] + (x >> 3);
+}
+
+int ff_init_me(struct MpegEncContext *s);
+
+void ff_estimate_p_frame_motion(struct MpegEncContext *s, int mb_x, int mb_y);
+void ff_estimate_b_frame_motion(struct MpegEncContext *s, int mb_x, int mb_y);
+
+int ff_pre_estimate_p_frame_motion(struct MpegEncContext *s,
+                                   int mb_x, int mb_y);
+
+int ff_epzs_motion_search(struct MpegEncContext *s, int *mx_ptr, int *my_ptr,
+                          int P[10][2], int src_index, int ref_index,
+                          int16_t (*last_mv)[2], int ref_mv_scale, int size,
+                          int h);
+
+int ff_get_mb_score(struct MpegEncContext *s, int mx, int my, int src_index,
+                    int ref_index, int size, int h, int add_rate);
+
+int ff_get_best_fcode(struct MpegEncContext *s,
+                      int16_t (*mv_table)[2], int type);
+
+void ff_fix_long_p_mvs(struct MpegEncContext *s);
+void ff_fix_long_mvs(struct MpegEncContext *s, uint8_t *field_select_table,
+                     int field_select, int16_t (*mv_table)[2], int f_code,
+                     int type, int truncate);
+
+#endif /* AVCODEC_MOTION_EST_H */
diff --git a/media/ffvpx/libavcodec/moz.build b/media/ffvpx/libavcodec/moz.build
new file mode 100644
index 000000000..443bbe978
--- /dev/null
+++ b/media/ffvpx/libavcodec/moz.build
@@ -0,0 +1,65 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Due to duplicate file names, we compile libavutil/x86 in its own
+# moz.build file.
+if CONFIG['FFVPX_ASFLAGS']:
+    DIRS += ['x86']
+
+SharedLibrary('mozavcodec')
+SOURCES += [
+    'allcodecs.c',
+    'audioconvert.c',
+    'avpacket.c',
+    'avpicture.c',
+    'bitstream.c',
+    'codec_desc.c',
+    'dummy_funcs.c',
+    'flac.c',
+    'flac_parser.c',
+    'flacdata.c',
+    'flacdec.c',
+    'flacdsp.c',
+    'golomb.c',
+    'h264pred.c',
+    'imgconvert.c',
+    'log2_tab.c',
+    'mathtables.c',
+    'options.c',
+    'parser.c',
+    'profiles.c',
+    'pthread.c',
+    'pthread_frame.c',
+    'pthread_slice.c',
+    'qsv_api.c',
+    'raw.c',
+    'resample.c',
+    'resample2.c',
+    'reverse.c',
+    'utils.c',
+    'videodsp.c',
+    'vorbis_parser.c',
+    'vp56rac.c',
+    'vp8.c',
+    'vp8_parser.c',
+    'vp8dsp.c',
+    'vp9.c',
+    'vp9_parser.c',
+    'vp9dsp.c',
+    'vp9dsp_10bpp.c',
+    'vp9dsp_12bpp.c',
+    'vp9dsp_8bpp.c',
+    'xiph.c'
+]
+
+SYMBOLS_FILE = 'avcodec.symbols'
+NO_VISIBILITY_FLAGS = True
+
+USE_LIBS += [
+     'mozavutil'
+]
+
+include("../ffvpxcommon.mozbuild")
diff --git a/media/ffvpx/libavcodec/mpeg12data.h b/media/ffvpx/libavcodec/mpeg12data.h
new file mode 100644
index 000000000..f51faf460
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpeg12data.h
@@ -0,0 +1,57 @@
+/*
+ * MPEG-1/2 tables
+ * copyright (c) 2000,2001 Fabrice Bellard
+ * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPEG-1/2 tables.
+ */
+
+#ifndef AVCODEC_MPEG12DATA_H
+#define AVCODEC_MPEG12DATA_H
+
+#include <stdint.h>
+#include "libavutil/rational.h"
+#include "rl.h"
+
+extern const uint16_t ff_mpeg1_default_intra_matrix[];
+extern const uint16_t ff_mpeg1_default_non_intra_matrix[64];
+
+extern const uint16_t ff_mpeg12_vlc_dc_lum_code[12];
+extern const unsigned char ff_mpeg12_vlc_dc_lum_bits[12];
+extern const uint16_t ff_mpeg12_vlc_dc_chroma_code[12];
+extern const unsigned char ff_mpeg12_vlc_dc_chroma_bits[12];
+
+extern RLTable ff_rl_mpeg1;
+extern RLTable ff_rl_mpeg2;
+
+extern const uint8_t ff_mpeg12_mbAddrIncrTable[36][2];
+extern const uint8_t ff_mpeg12_mbPatTable[64][2];
+
+extern const uint8_t ff_mpeg12_mbMotionVectorTable[17][2];
+
+extern const AVRational ff_mpeg12_frame_rate_tab[];
+extern const AVRational ff_mpeg2_frame_rate_tab[];
+
+extern const float ff_mpeg1_aspect[16];
+extern const AVRational ff_mpeg2_aspect[16];
+
+#endif /* AVCODEC_MPEG12DATA_H */
diff --git a/media/ffvpx/libavcodec/mpegpicture.h b/media/ffvpx/libavcodec/mpegpicture.h
new file mode 100644
index 000000000..2db3d6733
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegpicture.h
@@ -0,0 +1,114 @@
+/*
+ * Mpeg video formats-related defines and utility functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGPICTURE_H
+#define AVCODEC_MPEGPICTURE_H
+
+#include <stdint.h>
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+#include "motion_est.h"
+#include "thread.h"
+
+#define MAX_PICTURE_COUNT 36
+#define EDGE_WIDTH 16
+
+typedef struct ScratchpadContext {
+    uint8_t *edge_emu_buffer;     ///< temporary buffer for if MVs point to out-of-frame data
+    uint8_t *rd_scratchpad;       ///< scratchpad for rate distortion mb decision
+    uint8_t *obmc_scratchpad;
+    uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
+} ScratchpadContext;
+
+/**
+ * Picture.
+ */
+typedef struct Picture {
+    struct AVFrame *f;
+    ThreadFrame tf;
+
+    AVBufferRef *qscale_table_buf;
+    int8_t *qscale_table;
+
+    AVBufferRef *motion_val_buf[2];
+    int16_t (*motion_val[2])[2];
+
+    AVBufferRef *mb_type_buf;
+    uint32_t *mb_type;          ///< types and macros are defined in mpegutils.h
+
+    AVBufferRef *mbskip_table_buf;
+    uint8_t *mbskip_table;
+
+    AVBufferRef *ref_index_buf[2];
+    int8_t *ref_index[2];
+
+    AVBufferRef *mb_var_buf;
+    uint16_t *mb_var;           ///< Table for MB variances
+
+    AVBufferRef *mc_mb_var_buf;
+    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
+
+    int alloc_mb_width;         ///< mb_width used to allocate tables
+    int alloc_mb_height;        ///< mb_height used to allocate tables
+
+    AVBufferRef *mb_mean_buf;
+    uint8_t *mb_mean;           ///< Table for MB luminance
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private; ///< Hardware accelerator private data
+
+    int field_picture;          ///< whether or not the picture was encoded in separate fields
+
+    int64_t mb_var_sum;         ///< sum of MB variance for current frame
+    int64_t mc_mb_var_sum;      ///< motion compensated MB variance for current frame
+
+    int b_frame_score;
+    int needs_realloc;          ///< Picture needs to be reallocated (eg due to a frame size change)
+
+    int reference;
+    int shared;
+
+    uint64_t encoding_error[AV_NUM_DATA_POINTERS];
+} Picture;
+
+/**
+ * Allocate a Picture.
+ * The pixels are allocated/set by calling get_buffer() if shared = 0.
+ */
+int ff_alloc_picture(AVCodecContext *avctx, Picture *pic, MotionEstContext *me,
+                     ScratchpadContext *sc, int shared, int encoding,
+                     int chroma_x_shift, int chroma_y_shift, int out_format,
+                     int mb_stride, int mb_width, int mb_height, int b8_stride,
+                     ptrdiff_t *linesize, ptrdiff_t *uvlinesize);
+
+int ff_mpeg_framesize_alloc(AVCodecContext *avctx, MotionEstContext *me,
+                            ScratchpadContext *sc, int linesize);
+
+int ff_mpeg_ref_picture(AVCodecContext *avctx, Picture *dst, Picture *src);
+void ff_mpeg_unref_picture(AVCodecContext *avctx, Picture *picture);
+
+void ff_free_picture_tables(Picture *pic);
+int ff_update_picture_tables(Picture *dst, Picture *src);
+
+int ff_find_unused_picture(AVCodecContext *avctx, Picture *picture, int shared);
+
+#endif /* AVCODEC_MPEGPICTURE_H */
diff --git a/media/ffvpx/libavcodec/mpegutils.h b/media/ffvpx/libavcodec/mpegutils.h
new file mode 100644
index 000000000..9cfadfc4c
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegutils.h
@@ -0,0 +1,142 @@
+/*
+ * Mpeg video formats-related defines and utility functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGUTILS_H
+#define AVCODEC_MPEGUTILS_H
+
+#include <stdint.h>
+
+#include "libavutil/frame.h"
+
+#include "avcodec.h"
+#include "version.h"
+
+/**
+ * Return value for header parsers if frame is not coded.
+ * */
+#define FRAME_SKIPPED 100
+
+/* picture type */
+#define PICT_TOP_FIELD     1
+#define PICT_BOTTOM_FIELD  2
+#define PICT_FRAME         3
+
+/**
+ * Value of Picture.reference when Picture is not a reference picture, but
+ * is held for delayed output.
+ */
+#define DELAYED_PIC_REF 4
+
+#define MAX_MB_BYTES    (30 * 16 * 16 * 3 / 8 + 120)
+#define MAX_FCODE        7
+
+/* MB types */
+#if !FF_API_MB_TYPE
+#define MB_TYPE_INTRA4x4   (1 <<  0)
+#define MB_TYPE_INTRA16x16 (1 <<  1) // FIXME H.264-specific
+#define MB_TYPE_INTRA_PCM  (1 <<  2) // FIXME H.264-specific
+#define MB_TYPE_16x16      (1 <<  3)
+#define MB_TYPE_16x8       (1 <<  4)
+#define MB_TYPE_8x16       (1 <<  5)
+#define MB_TYPE_8x8        (1 <<  6)
+#define MB_TYPE_INTERLACED (1 <<  7)
+#define MB_TYPE_DIRECT2    (1 <<  8) // FIXME
+#define MB_TYPE_ACPRED     (1 <<  9)
+#define MB_TYPE_GMC        (1 << 10)
+#define MB_TYPE_SKIP       (1 << 11)
+#define MB_TYPE_P0L0       (1 << 12)
+#define MB_TYPE_P1L0       (1 << 13)
+#define MB_TYPE_P0L1       (1 << 14)
+#define MB_TYPE_P1L1       (1 << 15)
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      (1 << 16)
+#define MB_TYPE_CBP        (1 << 17)
+#endif
+
+#define MB_TYPE_INTRA    MB_TYPE_INTRA4x4 // default mb_type if there is just one type
+
+#define IS_INTRA4x4(a)   ((a) & MB_TYPE_INTRA4x4)
+#define IS_INTRA16x16(a) ((a) & MB_TYPE_INTRA16x16)
+#define IS_PCM(a)        ((a) & MB_TYPE_INTRA_PCM)
+#define IS_INTRA(a)      ((a) & 7)
+#define IS_INTER(a)      ((a) & (MB_TYPE_16x16 | MB_TYPE_16x8 | \
+                                 MB_TYPE_8x16  | MB_TYPE_8x8))
+#define IS_SKIP(a)       ((a) & MB_TYPE_SKIP)
+#define IS_INTRA_PCM(a)  ((a) & MB_TYPE_INTRA_PCM)
+#define IS_INTERLACED(a) ((a) & MB_TYPE_INTERLACED)
+#define IS_DIRECT(a)     ((a) & MB_TYPE_DIRECT2)
+#define IS_GMC(a)        ((a) & MB_TYPE_GMC)
+#define IS_16X16(a)      ((a) & MB_TYPE_16x16)
+#define IS_16X8(a)       ((a) & MB_TYPE_16x8)
+#define IS_8X16(a)       ((a) & MB_TYPE_8x16)
+#define IS_8X8(a)        ((a) & MB_TYPE_8x8)
+#define IS_SUB_8X8(a)    ((a) & MB_TYPE_16x16) // note reused
+#define IS_SUB_8X4(a)    ((a) & MB_TYPE_16x8)  // note reused
+#define IS_SUB_4X8(a)    ((a) & MB_TYPE_8x16)  // note reused
+#define IS_SUB_4X4(a)    ((a) & MB_TYPE_8x8)   // note reused
+#define IS_ACPRED(a)     ((a) & MB_TYPE_ACPRED)
+#define IS_QUANT(a)      ((a) & MB_TYPE_QUANT)
+#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0 << ((part) + 2 * (list))))
+
+// does this mb use listX, note does not work if subMBs
+#define USES_LIST(a, list) ((a) & ((MB_TYPE_P0L0 | MB_TYPE_P1L0) << (2 * (list))))
+
+#define HAS_CBP(a)       ((a) & MB_TYPE_CBP)
+
+/* MB types for encoding */
+#define CANDIDATE_MB_TYPE_INTRA      (1 <<  0)
+#define CANDIDATE_MB_TYPE_INTER      (1 <<  1)
+#define CANDIDATE_MB_TYPE_INTER4V    (1 <<  2)
+#define CANDIDATE_MB_TYPE_SKIPPED    (1 <<  3)
+
+#define CANDIDATE_MB_TYPE_DIRECT     (1 <<  4)
+#define CANDIDATE_MB_TYPE_FORWARD    (1 <<  5)
+#define CANDIDATE_MB_TYPE_BACKWARD   (1 <<  6)
+#define CANDIDATE_MB_TYPE_BIDIR      (1 <<  7)
+
+#define CANDIDATE_MB_TYPE_INTER_I    (1 <<  8)
+#define CANDIDATE_MB_TYPE_FORWARD_I  (1 <<  9)
+#define CANDIDATE_MB_TYPE_BACKWARD_I (1 << 10)
+#define CANDIDATE_MB_TYPE_BIDIR_I    (1 << 11)
+
+#define CANDIDATE_MB_TYPE_DIRECT0    (1 << 12)
+
+#define INPLACE_OFFSET 16
+
+enum OutputFormat {
+    FMT_MPEG1,
+    FMT_H261,
+    FMT_H263,
+    FMT_MJPEG,
+};
+
+
+/**
+ * Draw a horizontal band if supported.
+ *
+ * @param h is the normal height, this will be reduced automatically if needed
+ */
+void ff_draw_horiz_band(AVCodecContext *avctx, AVFrame *cur, AVFrame *last,
+                        int y, int h, int picture_structure, int first_field,
+                        int low_delay);
+
+#endif /* AVCODEC_MPEGUTILS_H */
diff --git a/media/ffvpx/libavcodec/mpegvideo.h b/media/ffvpx/libavcodec/mpegvideo.h
new file mode 100644
index 000000000..a1f3d4bd9
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideo.h
@@ -0,0 +1,752 @@
+/*
+ * Generic DCT based hybrid video encoder
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * mpegvideo header.
+ */
+
+#ifndef AVCODEC_MPEGVIDEO_H
+#define AVCODEC_MPEGVIDEO_H
+
+#include <float.h>
+
+#include "avcodec.h"
+#include "blockdsp.h"
+#include "error_resilience.h"
+#include "fdctdsp.h"
+#include "get_bits.h"
+#include "h264chroma.h"
+#include "h263dsp.h"
+#include "hpeldsp.h"
+#include "idctdsp.h"
+#include "internal.h"
+#include "me_cmp.h"
+#include "motion_est.h"
+#include "mpegpicture.h"
+#include "mpegvideodsp.h"
+#include "mpegvideoencdsp.h"
+#include "pixblockdsp.h"
+#include "put_bits.h"
+#include "ratecontrol.h"
+#include "parser.h"
+#include "mpegutils.h"
+#include "mpeg12data.h"
+#include "qpeldsp.h"
+#include "thread.h"
+#include "videodsp.h"
+
+#include "libavutil/opt.h"
+#include "libavutil/timecode.h"
+
+#define MAX_THREADS 32
+
+#define MAX_B_FRAMES 16
+
+/* Start codes. */
+#define SEQ_END_CODE            0x000001b7
+#define SEQ_START_CODE          0x000001b3
+#define GOP_START_CODE          0x000001b8
+#define PICTURE_START_CODE      0x00000100
+#define SLICE_MIN_START_CODE    0x00000101
+#define SLICE_MAX_START_CODE    0x000001af
+#define EXT_START_CODE          0x000001b5
+#define USER_START_CODE         0x000001b2
+
+/**
+ * MpegEncContext.
+ */
+typedef struct MpegEncContext {
+    AVClass *class;
+
+    int y_dc_scale, c_dc_scale;
+    int ac_pred;
+    int block_last_index[12];  ///< last non zero coefficient in block
+    int h263_aic;              ///< Advanced INTRA Coding (AIC)
+
+    /* scantables */
+    ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce the cache usage
+    ScanTable intra_scantable;
+    ScanTable intra_h_scantable;
+    ScanTable intra_v_scantable;
+
+    /* WARNING: changes above this line require updates to hardcoded
+     *          offsets used in ASM. */
+
+    struct AVCodecContext *avctx;
+    /* the following parameters must be initialized before encoding */
+    int width, height;///< picture size. must be a multiple of 16
+    int gop_size;
+    int intra_only;   ///< if true, only intra pictures are generated
+    int64_t bit_rate; ///< wanted bit rate
+    enum OutputFormat out_format; ///< output format
+    int h263_pred;    ///< use MPEG-4/H.263 ac/dc predictions
+    int pb_frame;     ///< PB-frame mode (0 = none, 1 = base, 2 = improved)
+
+/* the following codec id fields are deprecated in favor of codec_id */
+    int h263_plus;    ///< H.263+ headers
+    int h263_flv;     ///< use flv H.263 header
+
+    enum AVCodecID codec_id;     /* see AV_CODEC_ID_xxx */
+    int fixed_qscale; ///< fixed qscale if non zero
+    int encoding;     ///< true if we are encoding (vs decoding)
+    int max_b_frames; ///< max number of B-frames for encoding
+    int luma_elim_threshold;
+    int chroma_elim_threshold;
+    int strict_std_compliance; ///< strictly follow the std (MPEG-4, ...)
+    int workaround_bugs;       ///< workaround bugs in encoders which cannot be detected automatically
+    int codec_tag;             ///< internal codec_tag upper case converted from avctx codec_tag
+    /* the following fields are managed internally by the encoder */
+
+    /* sequence parameters */
+    int context_initialized;
+    int input_picture_number;  ///< used to set pic->display_picture_number, should not be used for/by anything else
+    int coded_picture_number;  ///< used to set pic->coded_picture_number, should not be used for/by anything else
+    int picture_number;       //FIXME remove, unclear definition
+    int picture_in_gop_number; ///< 0-> first pic in gop, ...
+    int mb_width, mb_height;   ///< number of MBs horizontally & vertically
+    int mb_stride;             ///< mb_width+1 used for some arrays to allow simple addressing of left & top MBs without sig11
+    int b8_stride;             ///< 2*mb_width+1 used for some 8x8 block arrays to allow simple addressing
+    int h_edge_pos, v_edge_pos;///< horizontal / vertical position of the right/bottom edge (pixel replication)
+    int mb_num;                ///< number of MBs of a picture
+    ptrdiff_t linesize;        ///< line size, in bytes, may be different from width
+    ptrdiff_t uvlinesize;      ///< line size, for chroma in bytes, may be different from width
+    Picture *picture;          ///< main picture buffer
+    Picture **input_picture;   ///< next pictures on display order for encoding
+    Picture **reordered_input_picture; ///< pointer to the next pictures in coded order for encoding
+
+    int64_t user_specified_pts; ///< last non-zero pts from AVFrame which was passed into avcodec_encode_video2()
+    /**
+     * pts difference between the first and second input frame, used for
+     * calculating dts of the first frame when there's a delay */
+    int64_t dts_delta;
+    /**
+     * reordered pts to be used as dts for the next output frame when there's
+     * a delay */
+    int64_t reordered_pts;
+
+    /** bit output */
+    PutBitContext pb;
+
+    int start_mb_y;            ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    int end_mb_y;              ///< end   mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
+    struct MpegEncContext *thread_context[MAX_THREADS];
+    int slice_context_count;   ///< number of used thread_contexts
+
+    /**
+     * copy of the previous picture structure.
+     * note, linesize & data, might not match the previous picture (for field pictures)
+     */
+    Picture last_picture;
+
+    /**
+     * copy of the next picture structure.
+     * note, linesize & data, might not match the next picture (for field pictures)
+     */
+    Picture next_picture;
+
+    /**
+     * copy of the source picture structure for encoding.
+     * note, linesize & data, might not match the source picture (for field pictures)
+     */
+    Picture new_picture;
+
+    /**
+     * copy of the current picture structure.
+     * note, linesize & data, might not match the current picture (for field pictures)
+     */
+    Picture current_picture;    ///< buffer to store the decompressed current picture
+
+    Picture *last_picture_ptr;     ///< pointer to the previous picture.
+    Picture *next_picture_ptr;     ///< pointer to the next picture (for bidir pred)
+    Picture *current_picture_ptr;  ///< pointer to the current picture
+    int last_dc[3];                ///< last DC values for MPEG-1
+    int16_t *dc_val_base;
+    int16_t *dc_val[3];            ///< used for MPEG-4 DC prediction, all 3 arrays must be continuous
+    const uint8_t *y_dc_scale_table;     ///< qscale -> y_dc_scale table
+    const uint8_t *c_dc_scale_table;     ///< qscale -> c_dc_scale table
+    const uint8_t *chroma_qscale_table;  ///< qscale -> chroma_qscale (H.263)
+    uint8_t *coded_block_base;
+    uint8_t *coded_block;          ///< used for coded block pattern prediction (msmpeg4v3, wmv1)
+    int16_t (*ac_val_base)[16];
+    int16_t (*ac_val[3])[16];      ///< used for MPEG-4 AC prediction, all 3 arrays must be continuous
+    int mb_skipped;                ///< MUST BE SET only during DECODING
+    uint8_t *mbskip_table;        /**< used to avoid copy if macroblock skipped (for black regions for example)
+                                   and used for B-frame encoding & decoding (contains skip table of next P-frame) */
+    uint8_t *mbintra_table;       ///< used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding
+    uint8_t *cbp_table;           ///< used to store cbp, ac_pred for partitioned decoding
+    uint8_t *pred_dir_table;      ///< used to store pred_dir for partitioned decoding
+
+    ScratchpadContext sc;
+
+    int qscale;                 ///< QP
+    int chroma_qscale;          ///< chroma QP
+    unsigned int lambda;        ///< Lagrange multiplier used in rate distortion
+    unsigned int lambda2;       ///< (lambda*lambda) >> FF_LAMBDA_SHIFT
+    int *lambda_table;
+    int adaptive_quant;         ///< use adaptive quantization
+    int dquant;                 ///< qscale difference to prev qscale
+    int closed_gop;             ///< MPEG1/2 GOP is closed
+    int pict_type;              ///< AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, ...
+    int vbv_delay;
+    int last_pict_type; //FIXME removes
+    int last_non_b_pict_type;   ///< used for MPEG-4 gmc B-frames & ratecontrol
+    int droppable;
+    int frame_rate_index;
+    AVRational mpeg2_frame_rate_ext;
+    int last_lambda_for[5];     ///< last lambda for a specific pict type
+    int skipdct;                ///< skip dct and code zero residual
+
+    /* motion compensation */
+    int unrestricted_mv;        ///< mv can point outside of the coded picture
+    int h263_long_vectors;      ///< use horrible H.263v1 long vector mode
+
+    BlockDSPContext bdsp;
+    FDCTDSPContext fdsp;
+    H264ChromaContext h264chroma;
+    HpelDSPContext hdsp;
+    IDCTDSPContext idsp;
+    MECmpContext mecc;
+    MpegVideoDSPContext mdsp;
+    MpegvideoEncDSPContext mpvencdsp;
+    PixblockDSPContext pdsp;
+    QpelDSPContext qdsp;
+    VideoDSPContext vdsp;
+    H263DSPContext h263dsp;
+    int f_code;                 ///< forward MV resolution
+    int b_code;                 ///< backward MV resolution for B-frames (MPEG-4)
+    int16_t (*p_mv_table_base)[2];
+    int16_t (*b_forw_mv_table_base)[2];
+    int16_t (*b_back_mv_table_base)[2];
+    int16_t (*b_bidir_forw_mv_table_base)[2];
+    int16_t (*b_bidir_back_mv_table_base)[2];
+    int16_t (*b_direct_mv_table_base)[2];
+    int16_t (*p_field_mv_table_base[2][2])[2];
+    int16_t (*b_field_mv_table_base[2][2][2])[2];
+    int16_t (*p_mv_table)[2];            ///< MV table (1MV per MB) P-frame encoding
+    int16_t (*b_forw_mv_table)[2];       ///< MV table (1MV per MB) forward mode B-frame encoding
+    int16_t (*b_back_mv_table)[2];       ///< MV table (1MV per MB) backward mode B-frame encoding
+    int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode B-frame encoding
+    int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode B-frame encoding
+    int16_t (*b_direct_mv_table)[2];     ///< MV table (1MV per MB) direct mode B-frame encoding
+    int16_t (*p_field_mv_table[2][2])[2];   ///< MV table (2MV per MB) interlaced P-frame encoding
+    int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced B-frame encoding
+    uint8_t (*p_field_select_table[2]);
+    uint8_t (*b_field_select_table[2][2]);
+#if FF_API_MOTION_EST
+    int me_method;                       ///< ME algorithm
+#endif
+    int motion_est;                      ///< ME algorithm
+    int me_penalty_compensation;
+    int me_pre;                          ///< prepass for motion estimation
+    int mv_dir;
+#define MV_DIR_FORWARD   1
+#define MV_DIR_BACKWARD  2
+#define MV_DIRECT        4 ///< bidirectional mode where the difference equals the MV of the last P/S/I-Frame (MPEG-4)
+    int mv_type;
+#define MV_TYPE_16X16       0   ///< 1 vector for the whole mb
+#define MV_TYPE_8X8         1   ///< 4 vectors (H.263, MPEG-4 4MV)
+#define MV_TYPE_16X8        2   ///< 2 vectors, one per 16x8 block
+#define MV_TYPE_FIELD       3   ///< 2 vectors, one per field
+#define MV_TYPE_DMV         4   ///< 2 vectors, special mpeg2 Dual Prime Vectors
+    /**motion vectors for a macroblock
+       first coordinate : 0 = forward 1 = backward
+       second "         : depend on type
+       third  "         : 0 = x, 1 = y
+    */
+    int mv[2][4][2];
+    int field_select[2][2];
+    int last_mv[2][2][2];             ///< last MV, used for MV prediction in MPEG-1 & B-frame MPEG-4
+    uint8_t *fcode_tab;               ///< smallest fcode needed for each MV
+    int16_t direct_scale_mv[2][64];   ///< precomputed to avoid divisions in ff_mpeg4_set_direct_mv
+
+    MotionEstContext me;
+
+    int no_rounding;  /**< apply no rounding to motion compensation (MPEG-4, msmpeg4, ...)
+                        for B-frames rounding mode is always 0 */
+
+    /* macroblock layer */
+    int mb_x, mb_y;
+    int mb_skip_run;
+    int mb_intra;
+    uint16_t *mb_type;  ///< Table for candidate MB types for encoding (defines in mpegutils.h)
+
+    int block_index[6]; ///< index to current MB in block based arrays with edges
+    int block_wrap[6];
+    uint8_t *dest[3];
+
+    int *mb_index2xy;        ///< mb_index -> mb_x + mb_y*mb_stride
+
+    /** matrix transmitted in the bitstream */
+    uint16_t intra_matrix[64];
+    uint16_t chroma_intra_matrix[64];
+    uint16_t inter_matrix[64];
+    uint16_t chroma_inter_matrix[64];
+    int force_duplicated_matrix; ///< Force duplication of mjpeg matrices, useful for rtp streaming
+
+    int intra_quant_bias;    ///< bias for the quantizer
+    int inter_quant_bias;    ///< bias for the quantizer
+    int min_qcoeff;          ///< minimum encodable coefficient
+    int max_qcoeff;          ///< maximum encodable coefficient
+    int ac_esc_length;       ///< num of bits needed to encode the longest esc
+    uint8_t *intra_ac_vlc_length;
+    uint8_t *intra_ac_vlc_last_length;
+    uint8_t *intra_chroma_ac_vlc_length;
+    uint8_t *intra_chroma_ac_vlc_last_length;
+    uint8_t *inter_ac_vlc_length;
+    uint8_t *inter_ac_vlc_last_length;
+    uint8_t *luma_dc_vlc_length;
+#define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level))
+
+    int coded_score[12];
+
+    /** precomputed matrix (combine qscale and DCT renorm) */
+    int (*q_intra_matrix)[64];
+    int (*q_chroma_intra_matrix)[64];
+    int (*q_inter_matrix)[64];
+    /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
+    uint16_t (*q_intra_matrix16)[2][64];
+    uint16_t (*q_chroma_intra_matrix16)[2][64];
+    uint16_t (*q_inter_matrix16)[2][64];
+
+    /* noise reduction */
+    int (*dct_error_sum)[64];
+    int dct_count[2];
+    uint16_t (*dct_offset)[64];
+
+    /* bit rate control */
+    int64_t total_bits;
+    int frame_bits;                ///< bits used for the current frame
+    int stuffing_bits;             ///< bits used for stuffing
+    int next_lambda;               ///< next lambda used for retrying to encode a frame
+    RateControlContext rc_context; ///< contains stuff only accessed in ratecontrol.c
+
+    /* statistics, used for 2-pass encoding */
+    int mv_bits;
+    int header_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int i_count;
+    int f_count;
+    int b_count;
+    int skip_count;
+    int misc_bits; ///< cbp, mb_type
+    int last_bits; ///< temp var used for calculating the above vars
+
+    /* error concealment / resync */
+    int resync_mb_x;                 ///< x position of last resync marker
+    int resync_mb_y;                 ///< y position of last resync marker
+    GetBitContext last_resync_gb;    ///< used to search for the next resync marker
+    int mb_num_left;                 ///< number of MBs left in this video packet (for partitioned Slices only)
+    int next_p_frame_damaged;        ///< set if the next p frame is damaged, to avoid showing trashed B-frames
+
+    ParseContext parse_context;
+
+    /* H.263 specific */
+    int gob_index;
+    int obmc;                       ///< overlapped block motion compensation
+    int mb_info;                    ///< interval for outputting info about mb offsets as side data
+    int prev_mb_info, last_mb_info;
+    uint8_t *mb_info_ptr;
+    int mb_info_size;
+    int ehc_mode;
+    int rc_strategy;
+
+    /* H.263+ specific */
+    int umvplus;                    ///< == H.263+ && unrestricted_mv
+    int h263_aic_dir;               ///< AIC direction: 0 = left, 1 = top
+    int h263_slice_structured;
+    int alt_inter_vlc;              ///< alternative inter vlc
+    int modified_quant;
+    int loop_filter;
+    int custom_pcf;
+
+    /* MPEG-4 specific */
+    ///< number of bits to represent the fractional part of time (encoder only)
+    int time_increment_bits;
+    int last_time_base;
+    int time_base;                  ///< time in seconds of last I,P,S Frame
+    int64_t time;                   ///< time of current frame
+    int64_t last_non_b_time;
+    uint16_t pp_time;               ///< time distance between the last 2 p,s,i frames
+    uint16_t pb_time;               ///< time distance between the last b and p,s,i frame
+    uint16_t pp_field_time;
+    uint16_t pb_field_time;         ///< like above, just for interlaced
+    int real_sprite_warping_points;
+    int sprite_offset[2][2];         ///< sprite offset[isChroma][isMVY]
+    int sprite_delta[2][2];          ///< sprite_delta [isY][isMVY]
+    int mcsel;
+    int quant_precision;
+    int quarter_sample;              ///< 1->qpel, 0->half pel ME/MC
+    int aspect_ratio_info; //FIXME remove
+    int sprite_warping_accuracy;
+    int data_partitioning;           ///< data partitioning flag from header
+    int partitioned_frame;           ///< is current frame partitioned
+    int low_delay;                   ///< no reordering needed / has no B-frames
+    int vo_type;
+    PutBitContext tex_pb;            ///< used for data partitioned VOPs
+    PutBitContext pb2;               ///< used for data partitioned VOPs
+    int mpeg_quant;
+    int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG-4
+
+    /* divx specific, used to workaround (many) bugs in divx5 */
+    int divx_packed;
+    uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them
+    int bitstream_buffer_size;
+    unsigned int allocated_bitstream_buffer_size;
+
+    /* RV10 specific */
+    int rv10_version; ///< RV10 version: 0 or 3
+    int rv10_first_dc_coded[3];
+
+    /* MJPEG specific */
+    struct MJpegContext *mjpeg_ctx;
+    int esc_pos;
+    int pred;
+
+    /* MSMPEG4 specific */
+    int mv_table_index;
+    int rl_table_index;
+    int rl_chroma_table_index;
+    int dc_table_index;
+    int use_skip_mb_code;
+    int slice_height;      ///< in macroblocks
+    int first_slice_line;  ///< used in MPEG-4 too to handle resync markers
+    int flipflop_rounding;
+    int msmpeg4_version;   ///< 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 4=wmv1/7 5=wmv2/8
+    int per_mb_rl_table;
+    int esc3_level_length;
+    int esc3_run_length;
+    /** [mb_intra][isChroma][level][run][last] */
+    int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
+    int inter_intra_pred;
+    int mspel;
+
+    /* decompression specific */
+    GetBitContext gb;
+
+    /* MPEG-1 specific */
+    int gop_picture_number;  ///< index of the first picture of a GOP based on fake_pic_num & MPEG-1 specific
+    int last_mv_dir;         ///< last mv_dir, used for B-frame encoding
+    uint8_t *vbv_delay_ptr;  ///< pointer to vbv_delay in the bitstream
+
+    /* MPEG-2-specific - I wished not to have to support this mess. */
+    int progressive_sequence;
+    int mpeg_f_code[2][2];
+
+    // picture structure defines are loaded from mpegutils.h
+    int picture_structure;
+
+    int64_t timecode_frame_start; ///< GOP timecode frame start number, in non drop frame format
+    int intra_dc_precision;
+    int frame_pred_frame_dct;
+    int top_field_first;
+    int concealment_motion_vectors;
+    int q_scale_type;
+    int brd_scale;
+    int intra_vlc_format;
+    int alternate_scan;
+    int seq_disp_ext;
+    int repeat_first_field;
+    int chroma_420_type;
+    int chroma_format;
+#define CHROMA_420 1
+#define CHROMA_422 2
+#define CHROMA_444 3
+    int chroma_x_shift;//depend on pix_format, that depend on chroma_format
+    int chroma_y_shift;
+
+    int progressive_frame;
+    int full_pel[2];
+    int interlaced_dct;
+    int first_field;         ///< is 1 for the first field of a field picture 0 otherwise
+    int drop_frame_timecode; ///< timecode is in drop frame format.
+    int scan_offset;         ///< reserve space for SVCD scan offset user data.
+
+    /* RTP specific */
+    int rtp_mode;
+    int rtp_payload_size;
+
+    char *tc_opt_str;        ///< timecode option string
+    AVTimecode tc;           ///< timecode context
+
+    uint8_t *ptr_lastgob;
+    int swap_uv;             //vcr2 codec is an MPEG-2 variant with U and V swapped
+    int pack_pblocks;        //xvmc needs to keep blocks without gaps.
+    int16_t (*pblocks[12])[64];
+
+    int16_t (*block)[64]; ///< points to one of the following blocks
+    int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block
+    int (*decode_mb)(struct MpegEncContext *s, int16_t block[6][64]); // used by some codecs to avoid a switch()
+#define SLICE_OK         0
+#define SLICE_ERROR     -1
+#define SLICE_END       -2 ///<end marker found
+#define SLICE_NOEND     -3 ///<no end marker or error found but mb count exceeded
+
+    void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (MPEG-4 can use both)
+                           int16_t *block/*align 16*/, int n, int qscale);
+    void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (MPEG-4 can use both)
+                           int16_t *block/*align 16*/, int n, int qscale);
+    int (*dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow);
+    int (*fast_dct_quantize)(struct MpegEncContext *s, int16_t *block/*align 16*/, int n, int qscale, int *overflow);
+    void (*denoise_dct)(struct MpegEncContext *s, int16_t *block);
+
+    int mpv_flags;      ///< flags set by private options
+    int quantizer_noise_shaping;
+
+    /**
+     * ratecontrol qmin qmax limiting method
+     * 0-> clipping, 1-> use a nice continuous function to limit qscale within qmin/qmax.
+     */
+    float rc_qsquish;
+    float rc_qmod_amp;
+    int   rc_qmod_freq;
+    float rc_initial_cplx;
+    float rc_buffer_aggressivity;
+    float border_masking;
+    int lmin, lmax;
+    int vbv_ignore_qmax;
+
+    char *rc_eq;
+
+    /* temp buffers for rate control */
+    float *cplx_tab, *bits_tab;
+
+    /* flag to indicate a reinitialization is required, e.g. after
+     * a frame size change */
+    int context_reinit;
+
+    ERContext er;
+
+    int error_rate;
+
+    /* temporary frames used by b_frame_strategy = 2 */
+    AVFrame *tmp_frames[MAX_B_FRAMES + 2];
+    int b_frame_strategy;
+    int b_sensitivity;
+
+    /* frame skip options for encoding */
+    int frame_skip_threshold;
+    int frame_skip_factor;
+    int frame_skip_exp;
+    int frame_skip_cmp;
+
+    int scenechange_threshold;
+    int noise_reduction;
+} MpegEncContext;
+
+/* mpegvideo_enc common options */
+#define FF_MPV_FLAG_SKIP_RD      0x0001
+#define FF_MPV_FLAG_STRICT_GOP   0x0002
+#define FF_MPV_FLAG_QP_RD        0x0004
+#define FF_MPV_FLAG_CBP_RD       0x0008
+#define FF_MPV_FLAG_NAQ          0x0010
+#define FF_MPV_FLAG_MV0          0x0020
+
+enum rc_strategy {
+    MPV_RC_STRATEGY_FFMPEG,
+    MPV_RC_STRATEGY_XVID,
+    NB_MPV_RC_STRATEGY
+};
+
+#define FF_MPV_OPT_CMP_FUNC \
+{ "sad",    "Sum of absolute differences, fast", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "sse",    "Sum of squared errors", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "satd",   "Sum of absolute Hadamard transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_SATD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "dct",    "Sum of absolute DCT transformed differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "psnr",   "Sum of squared quantization errors, low quality", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_PSNR }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "bit",    "Number of bits needed for the block", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_BIT }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "rd",     "Rate distortion optimal, slow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_RD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "zero",   "Zero", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_ZERO }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "vsad",   "Sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "vsse",   "Sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "nsse",   "Noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "dct264", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCT264 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{ "chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }
+
+#ifndef FF_MPV_OFFSET
+#define FF_MPV_OFFSET(x) offsetof(MpegEncContext, x)
+#endif
+#define FF_MPV_OPT_FLAGS (AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM)
+#define FF_MPV_COMMON_OPTS \
+FF_MPV_OPT_CMP_FUNC, \
+{ "mpv_flags",      "Flags common for all mpegvideo-based encoders.", FF_MPV_OFFSET(mpv_flags), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "skip_rd",        "RD optimal MB level residual skipping", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_SKIP_RD },    0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "strict_gop",     "Strictly enforce gop size",             0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_STRICT_GOP }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "qp_rd",          "Use rate distortion optimization for qp selection", 0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_QP_RD },  0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "cbp_rd",         "use rate distortion optimization for CBP",          0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_CBP_RD }, 0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "naq",            "normalize adaptive quantization",                   0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_NAQ },    0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "mv0",            "always try a mb with mv=<0,0>",                     0, AV_OPT_TYPE_CONST, { .i64 = FF_MPV_FLAG_MV0 },    0, 0, FF_MPV_OPT_FLAGS, "mpv_flags" },\
+{ "luma_elim_threshold",   "single coefficient elimination threshold for luminance (negative values also consider dc coefficient)",\
+                                                                      FF_MPV_OFFSET(luma_elim_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },\
+{ "chroma_elim_threshold", "single coefficient elimination threshold for chrominance (negative values also consider dc coefficient)",\
+                                                                      FF_MPV_OFFSET(chroma_elim_threshold), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },\
+{ "quantizer_noise_shaping", NULL,                                  FF_MPV_OFFSET(quantizer_noise_shaping), AV_OPT_TYPE_INT, { .i64 = 0 },       0, INT_MAX, FF_MPV_OPT_FLAGS },\
+{ "error_rate", "Simulate errors in the bitstream to test error concealment.",                                                                                                  \
+                                                                    FF_MPV_OFFSET(error_rate),              AV_OPT_TYPE_INT, { .i64 = 0 },       0, INT_MAX, FF_MPV_OPT_FLAGS },\
+{"qsquish", "how to keep quantizer between qmin and qmax (0 = clip, 1 = use differentiable function)",                                                                          \
+                                                                    FF_MPV_OFFSET(rc_qsquish), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, 0, 99, FF_MPV_OPT_FLAGS},                        \
+{"rc_qmod_amp", "experimental quantizer modulation",                FF_MPV_OFFSET(rc_qmod_amp), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS},           \
+{"rc_qmod_freq", "experimental quantizer modulation",               FF_MPV_OFFSET(rc_qmod_freq), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS},             \
+{"rc_eq", "Set rate control equation. When computing the expression, besides the standard functions "                                                                           \
+          "defined in the section 'Expression Evaluation', the following functions are available: "                                                                             \
+          "bits2qp(bits), qp2bits(qp). Also the following constants are available: iTex pTex tex mv "                                                                           \
+          "fCode iCount mcVar var isI isP isB avgQP qComp avgIITex avgPITex avgPPTex avgBPTex avgTex.",                                                                         \
+                                                                    FF_MPV_OFFSET(rc_eq), AV_OPT_TYPE_STRING,                           .flags = FF_MPV_OPT_FLAGS },            \
+{"rc_init_cplx", "initial complexity for 1-pass encoding",          FF_MPV_OFFSET(rc_initial_cplx), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS},       \
+{"rc_buf_aggressivity", "currently useless",                        FF_MPV_OFFSET(rc_buffer_aggressivity), AV_OPT_TYPE_FLOAT, {.dbl = 1.0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS}, \
+{"border_mask", "increase the quantizer for macroblocks close to borders", FF_MPV_OFFSET(border_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, FF_MPV_OPT_FLAGS},    \
+{"lmin", "minimum Lagrange factor (VBR)",                           FF_MPV_OFFSET(lmin), AV_OPT_TYPE_INT, {.i64 =  2*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS },            \
+{"lmax", "maximum Lagrange factor (VBR)",                           FF_MPV_OFFSET(lmax), AV_OPT_TYPE_INT, {.i64 = 31*FF_QP2LAMBDA }, 0, INT_MAX, FF_MPV_OPT_FLAGS },            \
+{"ibias", "intra quant bias",                                       FF_MPV_OFFSET(intra_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },   \
+{"pbias", "inter quant bias",                                       FF_MPV_OFFSET(inter_quant_bias), AV_OPT_TYPE_INT, {.i64 = FF_DEFAULT_QUANT_BIAS }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS },   \
+{"rc_strategy", "ratecontrol method",                               FF_MPV_OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = MPV_RC_STRATEGY_FFMPEG }, 0, NB_MPV_RC_STRATEGY-1, FF_MPV_OPT_FLAGS, "rc_strategy" },   \
+    { "ffmpeg", "default native rate control", 0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_FFMPEG }, 0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \
+    { "xvid",   "libxvid (2 pass only)",       0, AV_OPT_TYPE_CONST, { .i64 = MPV_RC_STRATEGY_XVID },   0, 0, FF_MPV_OPT_FLAGS, "rc_strategy" }, \
+{"motion_est", "motion estimation algorithm",                       FF_MPV_OFFSET(motion_est), AV_OPT_TYPE_INT, {.i64 = FF_ME_EPZS }, FF_ME_ZERO, FF_ME_XONE, FF_MPV_OPT_FLAGS, "motion_est" },   \
+{ "zero", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_ZERO }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
+{ "epzs", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_EPZS }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
+{ "xone", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_ME_XONE }, 0, 0, FF_MPV_OPT_FLAGS, "motion_est" }, \
+{ "force_duplicated_matrix", "Always write luma and chroma matrix for mjpeg, useful for rtp streaming.", FF_MPV_OFFSET(force_duplicated_matrix), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS },   \
+{"b_strategy", "Strategy to choose between I/P/B-frames",           FF_MPV_OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 2, FF_MPV_OPT_FLAGS }, \
+{"b_sensitivity", "Adjust sensitivity of b_frame_strategy 1",       FF_MPV_OFFSET(b_sensitivity), AV_OPT_TYPE_INT, {.i64 = 40 }, 1, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"brd_scale", "Downscale frames for dynamic B-frame decision",      FF_MPV_OFFSET(brd_scale), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 3, FF_MPV_OPT_FLAGS }, \
+{"skip_threshold", "Frame skip threshold",                          FF_MPV_OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"skip_factor", "Frame skip factor",                                FF_MPV_OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"skip_exp", "Frame skip exponent",                                 FF_MPV_OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"skip_cmp", "Frame skip compare function",                         FF_MPV_OFFSET(frame_skip_cmp), AV_OPT_TYPE_INT, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS, "cmp_func" }, \
+{"sc_threshold", "Scene change threshold",                          FF_MPV_OFFSET(scenechange_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"noise_reduction", "Noise reduction",                              FF_MPV_OFFSET(noise_reduction), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"mpeg_quant", "Use MPEG quantizers instead of H.263",              FF_MPV_OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, FF_MPV_OPT_FLAGS }, \
+{"ps", "RTP payload size in bytes",                             FF_MPV_OFFSET(rtp_payload_size), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"mepc", "Motion estimation bitrate penalty compensation (1.0 = 256)", FF_MPV_OFFSET(me_penalty_compensation), AV_OPT_TYPE_INT, {.i64 = 256 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+{"mepre", "pre motion estimation", FF_MPV_OFFSET(me_pre), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, FF_MPV_OPT_FLAGS }, \
+
+extern const AVOption ff_mpv_generic_options[];
+
+/**
+ * Set the given MpegEncContext to common defaults (same for encoding
+ * and decoding).  The changed fields will not depend upon the prior
+ * state of the MpegEncContext.
+ */
+void ff_mpv_common_defaults(MpegEncContext *s);
+
+void ff_dct_encode_init_x86(MpegEncContext *s);
+
+int ff_mpv_common_init(MpegEncContext *s);
+void ff_mpv_common_init_arm(MpegEncContext *s);
+void ff_mpv_common_init_axp(MpegEncContext *s);
+void ff_mpv_common_init_neon(MpegEncContext *s);
+void ff_mpv_common_init_ppc(MpegEncContext *s);
+void ff_mpv_common_init_x86(MpegEncContext *s);
+void ff_mpv_common_init_mips(MpegEncContext *s);
+
+int ff_mpv_common_frame_size_change(MpegEncContext *s);
+void ff_mpv_common_end(MpegEncContext *s);
+
+void ff_mpv_decode_defaults(MpegEncContext *s);
+void ff_mpv_decode_init(MpegEncContext *s, AVCodecContext *avctx);
+void ff_mpv_decode_mb(MpegEncContext *s, int16_t block[12][64]);
+void ff_mpv_report_decode_progress(MpegEncContext *s);
+
+int ff_mpv_frame_start(MpegEncContext *s, AVCodecContext *avctx);
+void ff_mpv_frame_end(MpegEncContext *s);
+
+int ff_mpv_encode_init(AVCodecContext *avctx);
+void ff_mpv_encode_init_x86(MpegEncContext *s);
+
+int ff_mpv_encode_end(AVCodecContext *avctx);
+int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
+                          const AVFrame *frame, int *got_packet);
+int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase);
+
+void ff_clean_intra_table_entries(MpegEncContext *s);
+void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h);
+void ff_mpeg_flush(AVCodecContext *avctx);
+
+void ff_print_debug_info(MpegEncContext *s, Picture *p, AVFrame *pict);
+void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
+                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
+                         int *low_delay,
+                         int mb_width, int mb_height, int mb_stride, int quarter_sample);
+
+int ff_mpv_export_qp_table(MpegEncContext *s, AVFrame *f, Picture *p, int qp_type);
+
+void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
+
+int ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
+int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src);
+void ff_set_qscale(MpegEncContext * s, int qscale);
+
+void ff_mpv_idct_init(MpegEncContext *s);
+int ff_dct_encode_init(MpegEncContext *s);
+void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
+                       const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra);
+int ff_dct_quantize_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
+void ff_block_permute(int16_t *block, uint8_t *permutation,
+                      const uint8_t *scantable, int last);
+void ff_init_block_index(MpegEncContext *s);
+
+void ff_mpv_motion(MpegEncContext *s,
+                   uint8_t *dest_y, uint8_t *dest_cb,
+                   uint8_t *dest_cr, int dir,
+                   uint8_t **ref_picture,
+                   op_pixels_func (*pix_op)[4],
+                   qpel_mc_func (*qpix_op)[16]);
+
+static inline void ff_update_block_index(MpegEncContext *s){
+    const int block_size= 8 >> s->avctx->lowres;
+
+    s->block_index[0]+=2;
+    s->block_index[1]+=2;
+    s->block_index[2]+=2;
+    s->block_index[3]+=2;
+    s->block_index[4]++;
+    s->block_index[5]++;
+    s->dest[0]+= 2*block_size;
+    s->dest[1]+= block_size;
+    s->dest[2]+= block_size;
+}
+
+static inline int get_bits_diff(MpegEncContext *s){
+    const int bits= put_bits_count(&s->pb);
+    const int last= s->last_bits;
+
+    s->last_bits = bits;
+
+    return bits - last;
+}
+
+#endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/media/ffvpx/libavcodec/mpegvideodsp.h b/media/ffvpx/libavcodec/mpegvideodsp.h
new file mode 100644
index 000000000..293e2548d
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideodsp.h
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGVIDEODSP_H
+#define AVCODEC_MPEGVIDEODSP_H
+
+#include <stdint.h>
+
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+              int dxx, int dxy, int dyx, int dyy, int shift, int r,
+              int width, int height);
+
+typedef struct MpegVideoDSPContext {
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */,
+                 int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */,
+                int stride, int h, int ox, int oy,
+                int dxx, int dxy, int dyx, int dyy,
+                int shift, int r, int width, int height);
+} MpegVideoDSPContext;
+
+void ff_mpegvideodsp_init(MpegVideoDSPContext *c);
+void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c);
+void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c);
+
+#endif /* AVCODEC_MPEGVIDEODSP_H */
diff --git a/media/ffvpx/libavcodec/mpegvideoencdsp.h b/media/ffvpx/libavcodec/mpegvideoencdsp.h
new file mode 100644
index 000000000..33f0282fc
--- /dev/null
+++ b/media/ffvpx/libavcodec/mpegvideoencdsp.h
@@ -0,0 +1,58 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGVIDEOENCDSP_H
+#define AVCODEC_MPEGVIDEOENCDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+#define EDGE_TOP    1
+#define EDGE_BOTTOM 2
+
+typedef struct MpegvideoEncDSPContext {
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
+                        int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+
+    int (*pix_sum)(uint8_t *pix, int line_size);
+    int (*pix_norm1)(uint8_t *pix, int line_size);
+
+    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src,
+                      int src_wrap, int width, int height);
+
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
+                       int w, int h, int sides);
+} MpegvideoEncDSPContext;
+
+void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+                             AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_arm(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_mips(MpegvideoEncDSPContext *c,
+                                  AVCodecContext *avctx);
+
+#endif /* AVCODEC_MPEGVIDEOENCDSP_H */
diff --git a/media/ffvpx/libavcodec/options.c b/media/ffvpx/libavcodec/options.c
new file mode 100644
index 000000000..f25df2ab3
--- /dev/null
+++ b/media/ffvpx/libavcodec/options.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Options definition for AVCodecContext.
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include <float.h>              /* FLT_MIN, FLT_MAX */
+#include <string.h>
+
+FF_DISABLE_DEPRECATION_WARNINGS
+#include "ff_options_table.h"
+FF_ENABLE_DEPRECATION_WARNINGS
+
+static const char* context_to_name(void* ptr) {
+    AVCodecContext *avc= ptr;
+
+    if(avc && avc->codec && avc->codec->name)
+        return avc->codec->name;
+    else
+        return "NULL";
+}
+
+static void *codec_child_next(void *obj, void *prev)
+{
+    AVCodecContext *s = obj;
+    if (!prev && s->codec && s->codec->priv_class && s->priv_data)
+        return s->priv_data;
+    return NULL;
+}
+
+static const AVClass *codec_child_class_next(const AVClass *prev)
+{
+    AVCodec *c = NULL;
+
+    /* find the codec that corresponds to prev */
+    while (prev && (c = av_codec_next(c)))
+        if (c->priv_class == prev)
+            break;
+
+    /* find next codec with priv options */
+    while (c = av_codec_next(c))
+        if (c->priv_class)
+            return c->priv_class;
+    return NULL;
+}
+
+static AVClassCategory get_category(void *ptr)
+{
+    AVCodecContext* avctx = ptr;
+    if(avctx->codec && avctx->codec->decode) return AV_CLASS_CATEGORY_DECODER;
+    else                                     return AV_CLASS_CATEGORY_ENCODER;
+}
+
+static const AVClass av_codec_context_class = {
+    .class_name              = "AVCodecContext",
+    .item_name               = context_to_name,
+    .option                  = avcodec_options,
+    .version                 = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset = offsetof(AVCodecContext, log_level_offset),
+    .child_next              = codec_child_next,
+    .child_class_next        = codec_child_class_next,
+    .category                = AV_CLASS_CATEGORY_ENCODER,
+    .get_category            = get_category,
+};
+
+static int init_context_defaults(AVCodecContext *s, const AVCodec *codec)
+{
+    int flags=0;
+    memset(s, 0, sizeof(AVCodecContext));
+
+    s->av_class = &av_codec_context_class;
+
+    s->codec_type = codec ? codec->type : AVMEDIA_TYPE_UNKNOWN;
+    if (codec) {
+        s->codec = codec;
+        s->codec_id = codec->id;
+    }
+
+    if(s->codec_type == AVMEDIA_TYPE_AUDIO)
+        flags= AV_OPT_FLAG_AUDIO_PARAM;
+    else if(s->codec_type == AVMEDIA_TYPE_VIDEO)
+        flags= AV_OPT_FLAG_VIDEO_PARAM;
+    else if(s->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        flags= AV_OPT_FLAG_SUBTITLE_PARAM;
+    av_opt_set_defaults2(s, flags, flags);
+
+    s->time_base           = (AVRational){0,1};
+    s->framerate           = (AVRational){ 0, 1 };
+    s->pkt_timebase        = (AVRational){ 0, 1 };
+    s->get_buffer2         = avcodec_default_get_buffer2;
+    s->get_format          = avcodec_default_get_format;
+    s->execute             = avcodec_default_execute;
+    s->execute2            = avcodec_default_execute2;
+    s->sample_aspect_ratio = (AVRational){0,1};
+    s->pix_fmt             = AV_PIX_FMT_NONE;
+    s->sample_fmt          = AV_SAMPLE_FMT_NONE;
+
+    s->reordered_opaque    = AV_NOPTS_VALUE;
+    if(codec && codec->priv_data_size){
+        if(!s->priv_data){
+            s->priv_data= av_mallocz(codec->priv_data_size);
+            if (!s->priv_data) {
+                return AVERROR(ENOMEM);
+            }
+        }
+        if(codec->priv_class){
+            *(const AVClass**)s->priv_data = codec->priv_class;
+            av_opt_set_defaults(s->priv_data);
+        }
+    }
+    if (codec && codec->defaults) {
+        int ret;
+        const AVCodecDefault *d = codec->defaults;
+        while (d->key) {
+            ret = av_opt_set(s, d->key, d->value, 0);
+            av_assert0(ret >= 0);
+            d++;
+        }
+    }
+    return 0;
+}
+
+#if FF_API_GET_CONTEXT_DEFAULTS
+int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec)
+{
+    return init_context_defaults(s, codec);
+}
+#endif
+
+AVCodecContext *avcodec_alloc_context3(const AVCodec *codec)
+{
+    AVCodecContext *avctx= av_malloc(sizeof(AVCodecContext));
+
+    if (!avctx)
+        return NULL;
+
+    if (init_context_defaults(avctx, codec) < 0) {
+        av_free(avctx);
+        return NULL;
+    }
+
+    return avctx;
+}
+
+void avcodec_free_context(AVCodecContext **pavctx)
+{
+    AVCodecContext *avctx = *pavctx;
+
+    if (!avctx)
+        return;
+
+    avcodec_close(avctx);
+
+    av_freep(&avctx->extradata);
+    av_freep(&avctx->subtitle_header);
+    av_freep(&avctx->intra_matrix);
+    av_freep(&avctx->inter_matrix);
+    av_freep(&avctx->rc_override);
+
+    av_freep(pavctx);
+}
+
+#if FF_API_COPY_CONTEXT
+int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src)
+{
+    const AVCodec *orig_codec = dest->codec;
+    uint8_t *orig_priv_data = dest->priv_data;
+
+    if (avcodec_is_open(dest)) { // check that the dest context is uninitialized
+        av_log(dest, AV_LOG_ERROR,
+               "Tried to copy AVCodecContext %p into already-initialized %p\n",
+               src, dest);
+        return AVERROR(EINVAL);
+    }
+
+    av_opt_free(dest);
+    av_freep(&dest->rc_override);
+    av_freep(&dest->intra_matrix);
+    av_freep(&dest->inter_matrix);
+    av_freep(&dest->extradata);
+    av_freep(&dest->subtitle_header);
+
+    memcpy(dest, src, sizeof(*dest));
+    av_opt_copy(dest, src);
+
+    dest->priv_data       = orig_priv_data;
+    dest->codec           = orig_codec;
+
+    if (orig_priv_data && src->codec && src->codec->priv_class &&
+        dest->codec && dest->codec->priv_class)
+        av_opt_copy(orig_priv_data, src->priv_data);
+
+
+    /* set values specific to opened codecs back to their default state */
+    dest->slice_offset    = NULL;
+    dest->hwaccel         = NULL;
+    dest->internal        = NULL;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    dest->coded_frame     = NULL;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    /* reallocate values that should be allocated separately */
+    dest->extradata       = NULL;
+    dest->intra_matrix    = NULL;
+    dest->inter_matrix    = NULL;
+    dest->rc_override     = NULL;
+    dest->subtitle_header = NULL;
+    dest->hw_frames_ctx   = NULL;
+
+#define alloc_and_copy_or_fail(obj, size, pad) \
+    if (src->obj && size > 0) { \
+        dest->obj = av_malloc(size + pad); \
+        if (!dest->obj) \
+            goto fail; \
+        memcpy(dest->obj, src->obj, size); \
+        if (pad) \
+            memset(((uint8_t *) dest->obj) + size, 0, pad); \
+    }
+    alloc_and_copy_or_fail(extradata,    src->extradata_size,
+                           AV_INPUT_BUFFER_PADDING_SIZE);
+    dest->extradata_size  = src->extradata_size;
+    alloc_and_copy_or_fail(intra_matrix, 64 * sizeof(int16_t), 0);
+    alloc_and_copy_or_fail(inter_matrix, 64 * sizeof(int16_t), 0);
+    alloc_and_copy_or_fail(rc_override,  src->rc_override_count * sizeof(*src->rc_override), 0);
+    alloc_and_copy_or_fail(subtitle_header, src->subtitle_header_size, 1);
+    av_assert0(dest->subtitle_header_size == src->subtitle_header_size);
+#undef alloc_and_copy_or_fail
+
+    if (src->hw_frames_ctx) {
+        dest->hw_frames_ctx = av_buffer_ref(src->hw_frames_ctx);
+        if (!dest->hw_frames_ctx)
+            goto fail;
+    }
+
+    return 0;
+
+fail:
+    av_freep(&dest->subtitle_header);
+    av_freep(&dest->rc_override);
+    av_freep(&dest->intra_matrix);
+    av_freep(&dest->inter_matrix);
+    av_freep(&dest->extradata);
+    av_buffer_unref(&dest->hw_frames_ctx);
+    dest->subtitle_header_size = 0;
+    dest->extradata_size = 0;
+    av_opt_free(dest);
+    return AVERROR(ENOMEM);
+}
+#endif
+
+const AVClass *avcodec_get_class(void)
+{
+    return &av_codec_context_class;
+}
+
+#define FOFFSET(x) offsetof(AVFrame,x)
+
+static const AVOption frame_options[]={
+{"best_effort_timestamp", "", FOFFSET(best_effort_timestamp), AV_OPT_TYPE_INT64, {.i64 = AV_NOPTS_VALUE }, INT64_MIN, INT64_MAX, 0},
+{"pkt_pos", "", FOFFSET(pkt_pos), AV_OPT_TYPE_INT64, {.i64 = -1 }, INT64_MIN, INT64_MAX, 0},
+{"pkt_size", "", FOFFSET(pkt_size), AV_OPT_TYPE_INT64, {.i64 = -1 }, INT64_MIN, INT64_MAX, 0},
+{"sample_aspect_ratio", "", FOFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
+{"width", "", FOFFSET(width), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"height", "", FOFFSET(height), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"format", "", FOFFSET(format), AV_OPT_TYPE_INT, {.i64 = -1 }, 0, INT_MAX, 0},
+{"channel_layout", "", FOFFSET(channel_layout), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, 0},
+{"sample_rate", "", FOFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{NULL},
+};
+
+static const AVClass av_frame_class = {
+    .class_name              = "AVFrame",
+    .item_name               = NULL,
+    .option                  = frame_options,
+    .version                 = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_get_frame_class(void)
+{
+    return &av_frame_class;
+}
+
+#define SROFFSET(x) offsetof(AVSubtitleRect,x)
+
+static const AVOption subtitle_rect_options[]={
+{"x", "", SROFFSET(x), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"y", "", SROFFSET(y), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"w", "", SROFFSET(w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"h", "", SROFFSET(h), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"type", "", SROFFSET(type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"flags", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0, "flags"},
+{"forced", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0},
+{NULL},
+};
+
+static const AVClass av_subtitle_rect_class = {
+    .class_name             = "AVSubtitleRect",
+    .item_name              = NULL,
+    .option                 = subtitle_rect_options,
+    .version                = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_get_subtitle_rect_class(void)
+{
+    return &av_subtitle_rect_class;
+}
+
+#ifdef TEST
+static int dummy_init(AVCodecContext *ctx)
+{
+    //TODO: this code should set every possible pointer that could be set by codec and is not an option;
+    ctx->extradata_size = 8;
+    ctx->extradata = av_malloc(ctx->extradata_size);
+    return 0;
+}
+
+static int dummy_close(AVCodecContext *ctx)
+{
+    av_freep(&ctx->extradata);
+    ctx->extradata_size = 0;
+    return 0;
+}
+
+static int dummy_encode(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame, int *got_packet)
+{
+    return AVERROR(ENOSYS);
+}
+
+typedef struct Dummy12Context {
+    AVClass  *av_class;
+    int      num;
+    char*    str;
+} Dummy12Context;
+
+typedef struct Dummy3Context {
+    void     *fake_av_class;
+    int      num;
+    char*    str;
+} Dummy3Context;
+
+#define OFFSET(x) offsetof(Dummy12Context, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption dummy_options[] = {
+    { "str", "set str", OFFSET(str), AV_OPT_TYPE_STRING, { .str = "i'm src default value" }, 0, 0, VE},
+    { "num", "set num", OFFSET(num), AV_OPT_TYPE_INT,    { .i64 = 1500100900 },    0, INT_MAX, VE},
+    { NULL },
+};
+
+static const AVClass dummy_v1_class = {
+    .class_name = "dummy_v1_class",
+    .item_name  = av_default_item_name,
+    .option     = dummy_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVClass dummy_v2_class = {
+    .class_name = "dummy_v2_class",
+    .item_name  = av_default_item_name,
+    .option     = dummy_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+/* codec with options */
+static AVCodec dummy_v1_encoder = {
+    .name             = "dummy_v1_codec",
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_NONE - 1,
+    .encode2          = dummy_encode,
+    .init             = dummy_init,
+    .close            = dummy_close,
+    .priv_class       = &dummy_v1_class,
+    .priv_data_size   = sizeof(Dummy12Context),
+};
+
+/* codec with options, different class */
+static AVCodec dummy_v2_encoder = {
+    .name             = "dummy_v2_codec",
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_NONE - 2,
+    .encode2          = dummy_encode,
+    .init             = dummy_init,
+    .close            = dummy_close,
+    .priv_class       = &dummy_v2_class,
+    .priv_data_size   = sizeof(Dummy12Context),
+};
+
+/* codec with priv data, but no class */
+static AVCodec dummy_v3_encoder = {
+    .name             = "dummy_v3_codec",
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_NONE - 3,
+    .encode2          = dummy_encode,
+    .init             = dummy_init,
+    .close            = dummy_close,
+    .priv_data_size   = sizeof(Dummy3Context),
+};
+
+/* codec without priv data */
+static AVCodec dummy_v4_encoder = {
+    .name             = "dummy_v4_codec",
+    .type             = AVMEDIA_TYPE_VIDEO,
+    .id               = AV_CODEC_ID_NONE - 4,
+    .encode2          = dummy_encode,
+    .init             = dummy_init,
+    .close            = dummy_close,
+};
+
+static void test_copy_print_codec(const AVCodecContext *ctx)
+{
+    printf("%-14s: %dx%d prv: %s",
+           ctx->codec ? ctx->codec->name : "NULL",
+           ctx->width, ctx->height,
+           ctx->priv_data ? "set" : "null");
+    if (ctx->codec && ctx->codec->priv_class && ctx->codec->priv_data_size) {
+        int64_t i64;
+        char *str = NULL;
+        av_opt_get_int(ctx->priv_data, "num", 0, &i64);
+        av_opt_get(ctx->priv_data, "str", 0, (uint8_t**)&str);
+        printf(" opts: %"PRId64" %s", i64, str);
+        av_free(str);
+    }
+    printf("\n");
+}
+
+static void test_copy(const AVCodec *c1, const AVCodec *c2)
+{
+    AVCodecContext *ctx1, *ctx2;
+    printf("%s -> %s\nclosed:\n", c1 ? c1->name : "NULL", c2 ? c2->name : "NULL");
+    ctx1 = avcodec_alloc_context3(c1);
+    ctx2 = avcodec_alloc_context3(c2);
+    ctx1->width = ctx1->height = 128;
+    if (ctx2->codec && ctx2->codec->priv_class && ctx2->codec->priv_data_size) {
+        av_opt_set(ctx2->priv_data, "num", "667", 0);
+        av_opt_set(ctx2->priv_data, "str", "i'm dest value before copy", 0);
+    }
+    avcodec_copy_context(ctx2, ctx1);
+    test_copy_print_codec(ctx1);
+    test_copy_print_codec(ctx2);
+    if (ctx1->codec) {
+        printf("opened:\n");
+        avcodec_open2(ctx1, ctx1->codec, NULL);
+        if (ctx2->codec && ctx2->codec->priv_class && ctx2->codec->priv_data_size) {
+            av_opt_set(ctx2->priv_data, "num", "667", 0);
+            av_opt_set(ctx2->priv_data, "str", "i'm dest value before copy", 0);
+        }
+        avcodec_copy_context(ctx2, ctx1);
+        test_copy_print_codec(ctx1);
+        test_copy_print_codec(ctx2);
+        avcodec_close(ctx1);
+    }
+    avcodec_free_context(&ctx1);
+    avcodec_free_context(&ctx2);
+}
+
+int main(void)
+{
+    AVCodec *dummy_codec[] = {
+        &dummy_v1_encoder,
+        &dummy_v2_encoder,
+        &dummy_v3_encoder,
+        &dummy_v4_encoder,
+        NULL,
+    };
+    int i, j;
+
+    for (i = 0; dummy_codec[i]; i++)
+        avcodec_register(dummy_codec[i]);
+
+    printf("testing avcodec_copy_context()\n");
+    for (i = 0; i < FF_ARRAY_ELEMS(dummy_codec); i++)
+        for (j = 0; j < FF_ARRAY_ELEMS(dummy_codec); j++)
+            test_copy(dummy_codec[i], dummy_codec[j]);
+    return 0;
+}
+#endif
diff --git a/media/ffvpx/libavcodec/parser.c b/media/ffvpx/libavcodec/parser.c
new file mode 100644
index 000000000..2c8fc6904
--- /dev/null
+++ b/media/ffvpx/libavcodec/parser.c
@@ -0,0 +1,340 @@
+/*
+ * Audio and Video frame extraction
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/atomic.h"
+#include "libavutil/internal.h"
+#include "libavutil/mem.h"
+
+#include "internal.h"
+#include "parser.h"
+
+static AVCodecParser *av_first_parser = NULL;
+
+AVCodecParser *av_parser_next(const AVCodecParser *p)
+{
+    if (p)
+        return p->next;
+    else
+        return av_first_parser;
+}
+
+void av_register_codec_parser(AVCodecParser *parser)
+{
+    do {
+        parser->next = av_first_parser;
+    } while (parser->next != avpriv_atomic_ptr_cas((void * volatile *)&av_first_parser, parser->next, parser));
+}
+
+AVCodecParserContext *av_parser_init(int codec_id)
+{
+    AVCodecParserContext *s = NULL;
+    AVCodecParser *parser;
+    int ret;
+
+    if (codec_id == AV_CODEC_ID_NONE)
+        return NULL;
+
+    for (parser = av_first_parser; parser; parser = parser->next) {
+        if (parser->codec_ids[0] == codec_id ||
+            parser->codec_ids[1] == codec_id ||
+            parser->codec_ids[2] == codec_id ||
+            parser->codec_ids[3] == codec_id ||
+            parser->codec_ids[4] == codec_id)
+            goto found;
+    }
+    return NULL;
+
+found:
+    s = av_mallocz(sizeof(AVCodecParserContext));
+    if (!s)
+        goto err_out;
+    s->parser = parser;
+    s->priv_data = av_mallocz(parser->priv_data_size);
+    if (!s->priv_data)
+        goto err_out;
+    s->fetch_timestamp=1;
+    s->pict_type = AV_PICTURE_TYPE_I;
+    if (parser->parser_init) {
+        ret = parser->parser_init(s);
+        if (ret != 0)
+            goto err_out;
+    }
+    s->key_frame            = -1;
+#if FF_API_CONVERGENCE_DURATION
+FF_DISABLE_DEPRECATION_WARNINGS
+    s->convergence_duration = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    s->dts_sync_point       = INT_MIN;
+    s->dts_ref_dts_delta    = INT_MIN;
+    s->pts_dts_delta        = INT_MIN;
+    s->format               = -1;
+
+    return s;
+
+err_out:
+    if (s)
+        av_freep(&s->priv_data);
+    av_free(s);
+    return NULL;
+}
+
+void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove, int fuzzy)
+{
+    int i;
+
+    if (!fuzzy) {
+        s->dts    =
+        s->pts    = AV_NOPTS_VALUE;
+        s->pos    = -1;
+        s->offset = 0;
+    }
+    for (i = 0; i < AV_PARSER_PTS_NB; i++) {
+        if (s->cur_offset + off >= s->cur_frame_offset[i] &&
+            (s->frame_offset < s->cur_frame_offset[i] ||
+             (!s->frame_offset && !s->next_frame_offset)) && // first field/frame
+            // check disabled since MPEG-TS does not send complete PES packets
+            /*s->next_frame_offset + off <*/  s->cur_frame_end[i]){
+
+            if (!fuzzy || s->cur_frame_dts[i] != AV_NOPTS_VALUE) {
+                s->dts    = s->cur_frame_dts[i];
+                s->pts    = s->cur_frame_pts[i];
+                s->pos    = s->cur_frame_pos[i];
+                s->offset = s->next_frame_offset - s->cur_frame_offset[i];
+            }
+            if (remove)
+                s->cur_frame_offset[i] = INT64_MAX;
+            if (s->cur_offset + off < s->cur_frame_end[i])
+                break;
+        }
+    }
+}
+
+int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts, int64_t pos)
+{
+    int index, i;
+    uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];
+
+    av_assert1(avctx->codec_id != AV_CODEC_ID_NONE);
+
+    /* Parsers only work for the specified codec ids. */
+    av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
+               avctx->codec_id == s->parser->codec_ids[1] ||
+               avctx->codec_id == s->parser->codec_ids[2] ||
+               avctx->codec_id == s->parser->codec_ids[3] ||
+               avctx->codec_id == s->parser->codec_ids[4]);
+
+    if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
+        s->next_frame_offset =
+        s->cur_offset        = pos;
+        s->flags            |= PARSER_FLAG_FETCHED_OFFSET;
+    }
+
+    if (buf_size == 0) {
+        /* padding is always necessary even if EOF, so we add it here */
+        memset(dummy_buf, 0, sizeof(dummy_buf));
+        buf = dummy_buf;
+    } else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */
+        /* add a new packet descriptor */
+        i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
+        s->cur_frame_start_index = i;
+        s->cur_frame_offset[i]   = s->cur_offset;
+        s->cur_frame_end[i]      = s->cur_offset + buf_size;
+        s->cur_frame_pts[i]      = pts;
+        s->cur_frame_dts[i]      = dts;
+        s->cur_frame_pos[i]      = pos;
+    }
+
+    if (s->fetch_timestamp) {
+        s->fetch_timestamp = 0;
+        s->last_pts        = s->pts;
+        s->last_dts        = s->dts;
+        s->last_pos        = s->pos;
+        ff_fetch_timestamp(s, 0, 0, 0);
+    }
+    /* WARNING: the returned index can be negative */
+    index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
+                                    poutbuf_size, buf, buf_size);
+    av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
+    /* update the file pointer */
+    if (*poutbuf_size) {
+        /* fill the data for the current frame */
+        s->frame_offset = s->next_frame_offset;
+
+        /* offset of the next frame */
+        s->next_frame_offset = s->cur_offset + index;
+        s->fetch_timestamp   = 1;
+    }
+    if (index < 0)
+        index = 0;
+    s->cur_offset += index;
+    return index;
+}
+
+int av_parser_change(AVCodecParserContext *s, AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe)
+{
+    if (s && s->parser->split) {
+        if (avctx->flags  & AV_CODEC_FLAG_GLOBAL_HEADER ||
+            avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER) {
+            int i = s->parser->split(avctx, buf, buf_size);
+            buf      += i;
+            buf_size -= i;
+        }
+    }
+
+    /* cast to avoid warning about discarding qualifiers */
+    *poutbuf      = (uint8_t *) buf;
+    *poutbuf_size = buf_size;
+    if (avctx->extradata) {
+        if (keyframe && (avctx->flags2 & AV_CODEC_FLAG2_LOCAL_HEADER)) {
+            int size = buf_size + avctx->extradata_size;
+
+            *poutbuf_size = size;
+            *poutbuf      = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (!*poutbuf)
+                return AVERROR(ENOMEM);
+
+            memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
+            memcpy(*poutbuf + avctx->extradata_size, buf,
+                   buf_size + AV_INPUT_BUFFER_PADDING_SIZE);
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+void av_parser_close(AVCodecParserContext *s)
+{
+    if (s) {
+        if (s->parser->parser_close)
+            s->parser->parser_close(s);
+        av_freep(&s->priv_data);
+        av_free(s);
+    }
+}
+
+int ff_combine_frame(ParseContext *pc, int next,
+                     const uint8_t **buf, int *buf_size)
+{
+    if (pc->overread) {
+        ff_dlog(NULL, "overread %d, state:%X next:%d index:%d o_index:%d\n",
+                pc->overread, pc->state, next, pc->index, pc->overread_index);
+        ff_dlog(NULL, "%X %X %X %X\n",
+                (*buf)[0], (*buf)[1], (*buf)[2], (*buf)[3]);
+    }
+
+    /* Copy overread bytes from last frame into buffer. */
+    for (; pc->overread > 0; pc->overread--)
+        pc->buffer[pc->index++] = pc->buffer[pc->overread_index++];
+
+    /* flush remaining if EOF */
+    if (!*buf_size && next == END_NOT_FOUND)
+        next = 0;
+
+    pc->last_index = pc->index;
+
+    /* copy into buffer end return */
+    if (next == END_NOT_FOUND) {
+        void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
+                                           *buf_size + pc->index +
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
+
+        if (!new_buffer) {
+            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", *buf_size + pc->index + AV_INPUT_BUFFER_PADDING_SIZE);
+            pc->index = 0;
+            return AVERROR(ENOMEM);
+        }
+        pc->buffer = new_buffer;
+        memcpy(&pc->buffer[pc->index], *buf, *buf_size);
+        pc->index += *buf_size;
+        return -1;
+    }
+
+    *buf_size          =
+    pc->overread_index = pc->index + next;
+
+    /* append to buffer */
+    if (pc->index) {
+        void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
+                                           next + pc->index +
+                                           AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!new_buffer) {
+            av_log(NULL, AV_LOG_ERROR, "Failed to reallocate parser buffer to %d\n", next + pc->index + AV_INPUT_BUFFER_PADDING_SIZE);
+            pc->overread_index =
+            pc->index = 0;
+            return AVERROR(ENOMEM);
+        }
+        pc->buffer = new_buffer;
+        if (next > -AV_INPUT_BUFFER_PADDING_SIZE)
+            memcpy(&pc->buffer[pc->index], *buf,
+                   next + AV_INPUT_BUFFER_PADDING_SIZE);
+        pc->index = 0;
+        *buf      = pc->buffer;
+    }
+
+    /* store overread bytes */
+    for (; next < 0; next++) {
+        pc->state   = pc->state   << 8 | pc->buffer[pc->last_index + next];
+        pc->state64 = pc->state64 << 8 | pc->buffer[pc->last_index + next];
+        pc->overread++;
+    }
+
+    if (pc->overread) {
+        ff_dlog(NULL, "overread %d, state:%X next:%d index:%d o_index:%d\n",
+                pc->overread, pc->state, next, pc->index, pc->overread_index);
+        ff_dlog(NULL, "%X %X %X %X\n",
+                (*buf)[0], (*buf)[1], (*buf)[2], (*buf)[3]);
+    }
+
+    return 0;
+}
+
+void ff_parse_close(AVCodecParserContext *s)
+{
+    ParseContext *pc = s->priv_data;
+
+    av_freep(&pc->buffer);
+}
+
+int ff_mpeg4video_split(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
+{
+    uint32_t state = -1;
+    const uint8_t *ptr = buf, *end = buf + buf_size;
+
+    while (ptr < end) {
+        ptr = avpriv_find_start_code(ptr, end, &state);
+        if (state == 0x1B3 || state == 0x1B6)
+            return ptr - 4 - buf;
+    }
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/parser.h b/media/ffvpx/libavcodec/parser.h
new file mode 100644
index 000000000..ef35547e9
--- /dev/null
+++ b/media/ffvpx/libavcodec/parser.h
@@ -0,0 +1,60 @@
+/*
+ * AVCodecParser prototypes and definitions
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PARSER_H
+#define AVCODEC_PARSER_H
+
+#include "avcodec.h"
+
+typedef struct ParseContext{
+    uint8_t *buffer;
+    int index;
+    int last_index;
+    unsigned int buffer_size;
+    uint32_t state;             ///< contains the last few bytes in MSB order
+    int frame_start_found;
+    int overread;               ///< the number of bytes which where irreversibly read from the next frame
+    int overread_index;         ///< the index into ParseContext.buffer of the overread bytes
+    uint64_t state64;           ///< contains the last 8 bytes in MSB order
+} ParseContext;
+
+#define END_NOT_FOUND (-100)
+
+/**
+ * Combine the (truncated) bitstream to a complete frame.
+ * @return -1 if no complete frame could be created,
+ *         AVERROR(ENOMEM) if there was a memory allocation error
+ */
+int ff_combine_frame(ParseContext *pc, int next, const uint8_t **buf, int *buf_size);
+int ff_mpeg4video_split(AVCodecContext *avctx, const uint8_t *buf,
+                        int buf_size);
+void ff_parse_close(AVCodecParserContext *s);
+
+/**
+ * Fetch timestamps for a specific byte within the current access unit.
+ * @param off byte position within the access unit
+ * @param remove Found timestamps will be removed if set to 1, kept if set to 0.
+ * @param fuzzy Only use found value if it is more informative than what we already have
+ */
+void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove, int fuzzy);
+
+#endif /* AVCODEC_PARSER_H */
diff --git a/media/ffvpx/libavcodec/pixblockdsp.h b/media/ffvpx/libavcodec/pixblockdsp.h
new file mode 100644
index 000000000..79ed86c3a
--- /dev/null
+++ b/media/ffvpx/libavcodec/pixblockdsp.h
@@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PIXBLOCKDSP_H
+#define AVCODEC_PIXBLOCKDSP_H
+
+#include <stdint.h>
+
+#include "avcodec.h"
+
+typedef struct PixblockDSPContext {
+    void (*get_pixels)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels /* align 8 */,
+                       ptrdiff_t line_size);
+    void (*diff_pixels)(int16_t *block /* align 16 */,
+                        const uint8_t *s1 /* align 8 */,
+                        const uint8_t *s2 /* align 8 */,
+                        int stride);
+} PixblockDSPContext;
+
+void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
+void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
+                               unsigned high_bit_depth);
+void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx,
+                             unsigned high_bit_depth);
+void ff_pixblockdsp_init_mips(PixblockDSPContext *c, AVCodecContext *avctx,
+                              unsigned high_bit_depth);
+
+#endif /* AVCODEC_PIXBLOCKDSP_H */
diff --git a/media/ffvpx/libavcodec/profiles.c b/media/ffvpx/libavcodec/profiles.c
new file mode 100644
index 000000000..da745e139
--- /dev/null
+++ b/media/ffvpx/libavcodec/profiles.c
@@ -0,0 +1,131 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "avcodec.h"
+#include "profiles.h"
+
+#if !CONFIG_SMALL
+
+const AVProfile ff_aac_profiles[] = {
+    { FF_PROFILE_AAC_LOW,   "LC"       },
+    { FF_PROFILE_AAC_HE,    "HE-AAC"   },
+    { FF_PROFILE_AAC_HE_V2, "HE-AACv2" },
+    { FF_PROFILE_AAC_LD,    "LD"       },
+    { FF_PROFILE_AAC_ELD,   "ELD"      },
+    { FF_PROFILE_AAC_MAIN,  "Main" },
+    { FF_PROFILE_AAC_LOW,   "LC"   },
+    { FF_PROFILE_AAC_SSR,   "SSR"  },
+    { FF_PROFILE_AAC_LTP,   "LTP"  },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_dca_profiles[] = {
+    { FF_PROFILE_DTS,         "DTS"         },
+    { FF_PROFILE_DTS_ES,      "DTS-ES"      },
+    { FF_PROFILE_DTS_96_24,   "DTS 96/24"   },
+    { FF_PROFILE_DTS_HD_HRA,  "DTS-HD HRA"  },
+    { FF_PROFILE_DTS_HD_MA,   "DTS-HD MA"   },
+    { FF_PROFILE_DTS_EXPRESS, "DTS Express" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_h264_profiles[] = {
+    { FF_PROFILE_H264_BASELINE,             "Baseline"              },
+    { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
+    { FF_PROFILE_H264_MAIN,                 "Main"                  },
+    { FF_PROFILE_H264_EXTENDED,             "Extended"              },
+    { FF_PROFILE_H264_HIGH,                 "High"                  },
+    { FF_PROFILE_H264_HIGH_10,              "High 10"               },
+    { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
+    { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
+    { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
+    { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
+    { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
+    { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
+    { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_hevc_profiles[] = {
+    { FF_PROFILE_HEVC_MAIN,                 "Main"                },
+    { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
+    { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
+    { FF_PROFILE_HEVC_REXT,                 "Rext"                },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_jpeg2000_profiles[] = {
+    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_0,  "JPEG 2000 codestream restriction 0"   },
+    { FF_PROFILE_JPEG2000_CSTREAM_RESTRICTION_1,  "JPEG 2000 codestream restriction 1"   },
+    { FF_PROFILE_JPEG2000_CSTREAM_NO_RESTRICTION, "JPEG 2000 no codestream restrictions" },
+    { FF_PROFILE_JPEG2000_DCINEMA_2K,             "JPEG 2000 digital cinema 2K"          },
+    { FF_PROFILE_JPEG2000_DCINEMA_4K,             "JPEG 2000 digital cinema 4K"          },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_mpeg2_video_profiles[] = {
+    { FF_PROFILE_MPEG2_422,          "4:2:2"              },
+    { FF_PROFILE_MPEG2_HIGH,         "High"               },
+    { FF_PROFILE_MPEG2_SS,           "Spatially Scalable" },
+    { FF_PROFILE_MPEG2_SNR_SCALABLE, "SNR Scalable"       },
+    { FF_PROFILE_MPEG2_MAIN,         "Main"               },
+    { FF_PROFILE_MPEG2_SIMPLE,       "Simple"             },
+    { FF_PROFILE_RESERVED,           "Reserved"           },
+    { FF_PROFILE_RESERVED,           "Reserved"           },
+    { FF_PROFILE_UNKNOWN                                  },
+};
+
+const AVProfile ff_mpeg4_video_profiles[] = {
+    { FF_PROFILE_MPEG4_SIMPLE,                    "Simple Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_SCALABLE,           "Simple Scalable Profile" },
+    { FF_PROFILE_MPEG4_CORE,                      "Core Profile" },
+    { FF_PROFILE_MPEG4_MAIN,                      "Main Profile" },
+    { FF_PROFILE_MPEG4_N_BIT,                     "N-bit Profile" },
+    { FF_PROFILE_MPEG4_SCALABLE_TEXTURE,          "Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION,     "Simple Face Animation Profile" },
+    { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE,    "Basic Animated Texture Profile" },
+    { FF_PROFILE_MPEG4_HYBRID,                    "Hybrid Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME,        "Advanced Real Time Simple Profile" },
+    { FF_PROFILE_MPEG4_CORE_SCALABLE,             "Code Scalable Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CODING,           "Advanced Coding Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_CORE,             "Advanced Core Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
+    { FF_PROFILE_MPEG4_SIMPLE_STUDIO,             "Simple Studio Profile" },
+    { FF_PROFILE_MPEG4_ADVANCED_SIMPLE,           "Advanced Simple Profile" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vc1_profiles[] = {
+    { FF_PROFILE_VC1_SIMPLE,   "Simple"   },
+    { FF_PROFILE_VC1_MAIN,     "Main"     },
+    { FF_PROFILE_VC1_COMPLEX,  "Complex"  },
+    { FF_PROFILE_VC1_ADVANCED, "Advanced" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+const AVProfile ff_vp9_profiles[] = {
+    { FF_PROFILE_VP9_0, "Profile 0" },
+    { FF_PROFILE_VP9_1, "Profile 1" },
+    { FF_PROFILE_VP9_2, "Profile 2" },
+    { FF_PROFILE_VP9_3, "Profile 3" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+#endif /* !CONFIG_SMALL */
diff --git a/media/ffvpx/libavcodec/profiles.h b/media/ffvpx/libavcodec/profiles.h
new file mode 100644
index 000000000..c86c683ab
--- /dev/null
+++ b/media/ffvpx/libavcodec/profiles.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PROFILES_H
+#define AVCODEC_PROFILES_H
+
+#include "avcodec.h"
+
+extern const AVProfile ff_aac_profiles[];
+extern const AVProfile ff_dca_profiles[];
+extern const AVProfile ff_h264_profiles[];
+extern const AVProfile ff_hevc_profiles[];
+extern const AVProfile ff_jpeg2000_profiles[];
+extern const AVProfile ff_mpeg2_video_profiles[];
+extern const AVProfile ff_mpeg4_video_profiles[];
+extern const AVProfile ff_vc1_profiles[];
+extern const AVProfile ff_vp9_profiles[];
+
+#endif /* AVCODEC_PROFILES_H */
diff --git a/media/ffvpx/libavcodec/pthread.c b/media/ffvpx/libavcodec/pthread.c
new file mode 100644
index 000000000..572471586
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2004 Roman Shaposhnik
+ * Copyright (c) 2008 Alexander Strange (astrange@ithinksw.com)
+ *
+ * Many thanks to Steven M. Schultz for providing clever ideas and
+ * to Michael Niedermayer <michaelni@gmx.at> for writing initial
+ * implementation.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Multithreading support functions
+ * @see doc/multithreading.txt
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "pthread_internal.h"
+#include "thread.h"
+
+/**
+ * Set the threading algorithms used.
+ *
+ * Threading requires more than one thread.
+ * Frame threading requires entire frames to be passed to the codec,
+ * and introduces extra decoding delay, so is incompatible with low_delay.
+ *
+ * @param avctx The context.
+ */
+static void validate_thread_parameters(AVCodecContext *avctx)
+{
+    int frame_threading_supported = (avctx->codec->capabilities & AV_CODEC_CAP_FRAME_THREADS)
+                                && !(avctx->flags  & AV_CODEC_FLAG_TRUNCATED)
+                                && !(avctx->flags  & AV_CODEC_FLAG_LOW_DELAY)
+                                && !(avctx->flags2 & AV_CODEC_FLAG2_CHUNKS);
+    if (avctx->thread_count == 1) {
+        avctx->active_thread_type = 0;
+    } else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) {
+        avctx->active_thread_type = FF_THREAD_FRAME;
+    } else if (avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS &&
+               avctx->thread_type & FF_THREAD_SLICE) {
+        avctx->active_thread_type = FF_THREAD_SLICE;
+    } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_AUTO_THREADS)) {
+        avctx->thread_count       = 1;
+        avctx->active_thread_type = 0;
+    }
+
+    if (avctx->thread_count > MAX_AUTO_THREADS)
+        av_log(avctx, AV_LOG_WARNING,
+               "Application has requested %d threads. Using a thread count greater than %d is not recommended.\n",
+               avctx->thread_count, MAX_AUTO_THREADS);
+}
+
+int ff_thread_init(AVCodecContext *avctx)
+{
+    validate_thread_parameters(avctx);
+
+    if (avctx->active_thread_type&FF_THREAD_SLICE)
+        return ff_slice_thread_init(avctx);
+    else if (avctx->active_thread_type&FF_THREAD_FRAME)
+        return ff_frame_thread_init(avctx);
+
+    return 0;
+}
+
+void ff_thread_free(AVCodecContext *avctx)
+{
+    if (avctx->active_thread_type&FF_THREAD_FRAME)
+        ff_frame_thread_free(avctx, avctx->thread_count);
+    else
+        ff_slice_thread_free(avctx);
+}
diff --git a/media/ffvpx/libavcodec/pthread_frame.c b/media/ffvpx/libavcodec/pthread_frame.c
new file mode 100644
index 000000000..a10fcbfe7
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread_frame.c
@@ -0,0 +1,900 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Frame multithreading support functions
+ * @see doc/multithreading.txt
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+
+#include "avcodec.h"
+#include "internal.h"
+#include "pthread_internal.h"
+#include "thread.h"
+#include "version.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/buffer.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/frame.h"
+#include "libavutil/internal.h"
+#include "libavutil/log.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/thread.h"
+
+#if defined(MOZ_TSAN)
+typedef  _Atomic(int)  atomic_int;
+#else
+typedef  volatile int  atomic_int;
+#endif
+
+/**
+ * Context used by codec threads and stored in their AVCodecInternal thread_ctx.
+ */
+typedef enum {
+    STATE_INPUT_READY,          ///< Set when the thread is awaiting a packet.
+    STATE_SETTING_UP,           ///< Set before the codec has called ff_thread_finish_setup().
+    STATE_GET_BUFFER,           /**<
+                                 * Set when the codec calls get_buffer().
+                                 * State is returned to STATE_SETTING_UP afterwards.
+                                 */
+    STATE_GET_FORMAT,           /**<
+                                 * Set when the codec calls get_format().
+                                 * State is returned to STATE_SETTING_UP afterwards.
+                                 */
+    STATE_SETUP_FINISHED        ///< Set after the codec has called ff_thread_finish_setup().
+} State;
+
+typedef struct PerThreadContext {
+    struct FrameThreadContext *parent;
+
+    pthread_t      thread;
+    int            thread_init;
+    pthread_cond_t input_cond;      ///< Used to wait for a new packet from the main thread.
+    pthread_cond_t progress_cond;   ///< Used by child threads to wait for progress to change.
+    pthread_cond_t output_cond;     ///< Used by the main thread to wait for frames to finish.
+
+    pthread_mutex_t mutex;          ///< Mutex used to protect the contents of the PerThreadContext.
+    pthread_mutex_t progress_mutex; ///< Mutex used to protect frame progress values and progress_cond.
+
+    AVCodecContext *avctx;          ///< Context used to decode packets passed to this thread.
+
+    AVPacket       avpkt;           ///< Input packet (for decoding) or output (for encoding).
+
+    AVFrame *frame;                 ///< Output frame (for decoding) or input (for encoding).
+    int     got_frame;              ///< The output of got_picture_ptr from the last avcodec_decode_video() call.
+    int     result;                 ///< The result of the last codec decode/encode() call.
+
+    atomic_int state;
+
+    /**
+     * Array of frames passed to ff_thread_release_buffer().
+     * Frames are released after all threads referencing them are finished.
+     */
+    AVFrame *released_buffers;
+    int  num_released_buffers;
+    int      released_buffers_allocated;
+
+    AVFrame *requested_frame;       ///< AVFrame the codec passed to get_buffer()
+    int      requested_flags;       ///< flags passed to get_buffer() for requested_frame
+
+    const enum AVPixelFormat *available_formats; ///< Format array for get_format()
+    enum AVPixelFormat result_format;            ///< get_format() result
+
+    int die;                        ///< Set when the thread should exit.
+} PerThreadContext;
+
+/**
+ * Context stored in the client AVCodecInternal thread_ctx.
+ */
+typedef struct FrameThreadContext {
+    PerThreadContext *threads;     ///< The contexts for each thread.
+    PerThreadContext *prev_thread; ///< The last thread submit_packet() was called on.
+
+    pthread_mutex_t buffer_mutex;  ///< Mutex used to protect get/release_buffer().
+
+    int next_decoding;             ///< The next context to submit a packet to.
+    int next_finished;             ///< The next context to return output from.
+
+    int delaying;                  /**<
+                                    * Set for the first N packets, where N is the number of threads.
+                                    * While it is set, ff_thread_en/decode_frame won't return any results.
+                                    */
+} FrameThreadContext;
+
+#define THREAD_SAFE_CALLBACKS(avctx) \
+((avctx)->thread_safe_callbacks || (avctx)->get_buffer2 == avcodec_default_get_buffer2)
+
+/**
+ * Codec worker thread.
+ *
+ * Automatically calls ff_thread_finish_setup() if the codec does
+ * not provide an update_thread_context method, or if the codec returns
+ * before calling it.
+ */
+static attribute_align_arg void *frame_worker_thread(void *arg)
+{
+    PerThreadContext *p = arg;
+    AVCodecContext *avctx = p->avctx;
+    const AVCodec *codec = avctx->codec;
+
+    pthread_mutex_lock(&p->mutex);
+    while (1) {
+            while (p->state == STATE_INPUT_READY && !p->die)
+                pthread_cond_wait(&p->input_cond, &p->mutex);
+
+        if (p->die) break;
+
+        if (!codec->update_thread_context && THREAD_SAFE_CALLBACKS(avctx))
+            ff_thread_finish_setup(avctx);
+
+        av_frame_unref(p->frame);
+        p->got_frame = 0;
+        p->result = codec->decode(avctx, p->frame, &p->got_frame, &p->avpkt);
+
+        if ((p->result < 0 || !p->got_frame) && p->frame->buf[0]) {
+            if (avctx->internal->allocate_progress)
+                av_log(avctx, AV_LOG_ERROR, "A frame threaded decoder did not "
+                       "free the frame on failure. This is a bug, please report it.\n");
+            av_frame_unref(p->frame);
+        }
+
+        if (p->state == STATE_SETTING_UP) ff_thread_finish_setup(avctx);
+
+        pthread_mutex_lock(&p->progress_mutex);
+#if 0 //BUFREF-FIXME
+        for (i = 0; i < MAX_BUFFERS; i++)
+            if (p->progress_used[i] && (p->got_frame || p->result<0 || avctx->codec_id != AV_CODEC_ID_H264)) {
+                p->progress[i][0] = INT_MAX;
+                p->progress[i][1] = INT_MAX;
+            }
+#endif
+        p->state = STATE_INPUT_READY;
+
+        pthread_cond_broadcast(&p->progress_cond);
+        pthread_cond_signal(&p->output_cond);
+        pthread_mutex_unlock(&p->progress_mutex);
+    }
+    pthread_mutex_unlock(&p->mutex);
+
+    return NULL;
+}
+
+/**
+ * Update the next thread's AVCodecContext with values from the reference thread's context.
+ *
+ * @param dst The destination context.
+ * @param src The source context.
+ * @param for_user 0 if the destination is a codec thread, 1 if the destination is the user's thread
+ * @return 0 on success, negative error code on failure
+ */
+static int update_context_from_thread(AVCodecContext *dst, AVCodecContext *src, int for_user)
+{
+    int err = 0;
+
+    if (dst != src) {
+        dst->time_base = src->time_base;
+        dst->framerate = src->framerate;
+        dst->width     = src->width;
+        dst->height    = src->height;
+        dst->pix_fmt   = src->pix_fmt;
+
+        dst->coded_width  = src->coded_width;
+        dst->coded_height = src->coded_height;
+
+        dst->has_b_frames = src->has_b_frames;
+        dst->idct_algo    = src->idct_algo;
+
+        dst->bits_per_coded_sample = src->bits_per_coded_sample;
+        dst->sample_aspect_ratio   = src->sample_aspect_ratio;
+#if FF_API_AFD
+FF_DISABLE_DEPRECATION_WARNINGS
+        dst->dtg_active_format     = src->dtg_active_format;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_AFD */
+
+        dst->profile = src->profile;
+        dst->level   = src->level;
+
+        dst->bits_per_raw_sample = src->bits_per_raw_sample;
+        dst->ticks_per_frame     = src->ticks_per_frame;
+        dst->color_primaries     = src->color_primaries;
+
+        dst->color_trc   = src->color_trc;
+        dst->colorspace  = src->colorspace;
+        dst->color_range = src->color_range;
+        dst->chroma_sample_location = src->chroma_sample_location;
+
+        dst->hwaccel = src->hwaccel;
+        dst->hwaccel_context = src->hwaccel_context;
+
+        dst->channels       = src->channels;
+        dst->sample_rate    = src->sample_rate;
+        dst->sample_fmt     = src->sample_fmt;
+        dst->channel_layout = src->channel_layout;
+        dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data;
+    }
+
+    if (for_user) {
+        dst->delay       = src->thread_count - 1;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        dst->coded_frame = src->coded_frame;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    } else {
+        if (dst->codec->update_thread_context)
+            err = dst->codec->update_thread_context(dst, src);
+    }
+
+    return err;
+}
+
+/**
+ * Update the next thread's AVCodecContext with values set by the user.
+ *
+ * @param dst The destination context.
+ * @param src The source context.
+ * @return 0 on success, negative error code on failure
+ */
+static int update_context_from_user(AVCodecContext *dst, AVCodecContext *src)
+{
+#define copy_fields(s, e) memcpy(&dst->s, &src->s, (char*)&dst->e - (char*)&dst->s);
+    dst->flags          = src->flags;
+
+    dst->draw_horiz_band= src->draw_horiz_band;
+    dst->get_buffer2    = src->get_buffer2;
+
+    dst->opaque   = src->opaque;
+    dst->debug    = src->debug;
+    dst->debug_mv = src->debug_mv;
+
+    dst->slice_flags = src->slice_flags;
+    dst->flags2      = src->flags2;
+
+    copy_fields(skip_loop_filter, subtitle_header);
+
+    dst->frame_number     = src->frame_number;
+    dst->reordered_opaque = src->reordered_opaque;
+    dst->thread_safe_callbacks = src->thread_safe_callbacks;
+
+    if (src->slice_count && src->slice_offset) {
+        if (dst->slice_count < src->slice_count) {
+            int err = av_reallocp_array(&dst->slice_offset, src->slice_count,
+                                        sizeof(*dst->slice_offset));
+            if (err < 0)
+                return err;
+        }
+        memcpy(dst->slice_offset, src->slice_offset,
+               src->slice_count * sizeof(*dst->slice_offset));
+    }
+    dst->slice_count = src->slice_count;
+    return 0;
+#undef copy_fields
+}
+
+/// Releases the buffers that this decoding thread was the last user of.
+static void release_delayed_buffers(PerThreadContext *p)
+{
+    FrameThreadContext *fctx = p->parent;
+
+    while (p->num_released_buffers > 0) {
+        AVFrame *f;
+
+        pthread_mutex_lock(&fctx->buffer_mutex);
+
+        // fix extended data in case the caller screwed it up
+        av_assert0(p->avctx->codec_type == AVMEDIA_TYPE_VIDEO ||
+                   p->avctx->codec_type == AVMEDIA_TYPE_AUDIO);
+        f = &p->released_buffers[--p->num_released_buffers];
+        f->extended_data = f->data;
+        av_frame_unref(f);
+
+        pthread_mutex_unlock(&fctx->buffer_mutex);
+    }
+}
+
+static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
+{
+    FrameThreadContext *fctx = p->parent;
+    PerThreadContext *prev_thread = fctx->prev_thread;
+    const AVCodec *codec = p->avctx->codec;
+
+    if (!avpkt->size && !(codec->capabilities & AV_CODEC_CAP_DELAY))
+        return 0;
+
+    pthread_mutex_lock(&p->mutex);
+
+    release_delayed_buffers(p);
+
+    if (prev_thread) {
+        int err;
+        if (prev_thread->state == STATE_SETTING_UP) {
+            pthread_mutex_lock(&prev_thread->progress_mutex);
+            while (prev_thread->state == STATE_SETTING_UP)
+                pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex);
+            pthread_mutex_unlock(&prev_thread->progress_mutex);
+        }
+
+        err = update_context_from_thread(p->avctx, prev_thread->avctx, 0);
+        if (err) {
+            pthread_mutex_unlock(&p->mutex);
+            return err;
+        }
+    }
+
+    av_packet_unref(&p->avpkt);
+    av_packet_ref(&p->avpkt, avpkt);
+
+    p->state = STATE_SETTING_UP;
+    pthread_cond_signal(&p->input_cond);
+    pthread_mutex_unlock(&p->mutex);
+
+    /*
+     * If the client doesn't have a thread-safe get_buffer(),
+     * then decoding threads call back to the main thread,
+     * and it calls back to the client here.
+     */
+
+    if (!p->avctx->thread_safe_callbacks && (
+         p->avctx->get_format != avcodec_default_get_format ||
+         p->avctx->get_buffer2 != avcodec_default_get_buffer2)) {
+        while (p->state != STATE_SETUP_FINISHED && p->state != STATE_INPUT_READY) {
+            int call_done = 1;
+            pthread_mutex_lock(&p->progress_mutex);
+            while (p->state == STATE_SETTING_UP)
+                pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+
+            State p_state = (State)p->state;
+            switch (p_state) {
+            case STATE_GET_BUFFER:
+                p->result = ff_get_buffer(p->avctx, p->requested_frame, p->requested_flags);
+                break;
+            case STATE_GET_FORMAT:
+                p->result_format = ff_get_format(p->avctx, p->available_formats);
+                break;
+            default:
+                call_done = 0;
+                break;
+            }
+            if (call_done) {
+                p->state  = STATE_SETTING_UP;
+                pthread_cond_signal(&p->progress_cond);
+            }
+            pthread_mutex_unlock(&p->progress_mutex);
+        }
+    }
+
+    fctx->prev_thread = p;
+    fctx->next_decoding++;
+
+    return 0;
+}
+
+int ff_thread_decode_frame(AVCodecContext *avctx,
+                           AVFrame *picture, int *got_picture_ptr,
+                           AVPacket *avpkt)
+{
+    FrameThreadContext *fctx = avctx->internal->thread_ctx;
+    int finished = fctx->next_finished;
+    PerThreadContext *p;
+    int err;
+
+    /*
+     * Submit a packet to the next decoding thread.
+     */
+
+    p = &fctx->threads[fctx->next_decoding];
+    err = update_context_from_user(p->avctx, avctx);
+    if (err) return err;
+    err = submit_packet(p, avpkt);
+    if (err) return err;
+
+    /*
+     * If we're still receiving the initial packets, don't return a frame.
+     */
+
+    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
+        fctx->delaying = 0;
+
+    if (fctx->delaying) {
+        *got_picture_ptr=0;
+        if (avpkt->size)
+            return avpkt->size;
+    }
+
+    /*
+     * Return the next available frame from the oldest thread.
+     * If we're at the end of the stream, then we have to skip threads that
+     * didn't output a frame, because we don't want to accidentally signal
+     * EOF (avpkt->size == 0 && *got_picture_ptr == 0).
+     */
+
+    do {
+        p = &fctx->threads[finished++];
+
+        if (p->state != STATE_INPUT_READY) {
+            pthread_mutex_lock(&p->progress_mutex);
+            while (p->state != STATE_INPUT_READY)
+                pthread_cond_wait(&p->output_cond, &p->progress_mutex);
+            pthread_mutex_unlock(&p->progress_mutex);
+        }
+
+        av_frame_move_ref(picture, p->frame);
+        *got_picture_ptr = p->got_frame;
+        picture->pkt_dts = p->avpkt.dts;
+
+        if (p->result < 0)
+            err = p->result;
+
+        /*
+         * A later call with avkpt->size == 0 may loop over all threads,
+         * including this one, searching for a frame to return before being
+         * stopped by the "finished != fctx->next_finished" condition.
+         * Make sure we don't mistakenly return the same frame again.
+         */
+        p->got_frame = 0;
+
+        if (finished >= avctx->thread_count) finished = 0;
+    } while (!avpkt->size && !*got_picture_ptr && finished != fctx->next_finished);
+
+    update_context_from_thread(avctx, p->avctx, 1);
+
+    if (fctx->next_decoding >= avctx->thread_count) fctx->next_decoding = 0;
+
+    fctx->next_finished = finished;
+
+    /*
+     * When no frame was found while flushing, but an error occurred in
+     * any thread, return it instead of 0.
+     * Otherwise the error can get lost.
+     */
+    if (!avpkt->size && !*got_picture_ptr)
+        return err;
+
+    /* return the size of the consumed packet if no error occurred */
+    return (p->result >= 0) ? avpkt->size : p->result;
+}
+
+void ff_thread_report_progress(ThreadFrame *f, int n, int field)
+{
+    PerThreadContext *p;
+    atomic_int *progress = f->progress ? (atomic_int*)f->progress->data : NULL;
+
+    if (!progress || progress[field] >= n) return;
+
+    p = f->owner->internal->thread_ctx;
+
+    if (f->owner->debug&FF_DEBUG_THREADS)
+        av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, n, field);
+
+    pthread_mutex_lock(&p->progress_mutex);
+    progress[field] = n;
+    pthread_cond_broadcast(&p->progress_cond);
+    pthread_mutex_unlock(&p->progress_mutex);
+}
+
+void ff_thread_await_progress(ThreadFrame *f, int n, int field)
+{
+    PerThreadContext *p;
+    atomic_int *progress = f->progress ? (atomic_int*)f->progress->data : NULL;
+
+    if (!progress || progress[field] >= n) return;
+
+    p = f->owner->internal->thread_ctx;
+
+    if (f->owner->debug&FF_DEBUG_THREADS)
+        av_log(f->owner, AV_LOG_DEBUG, "thread awaiting %d field %d from %p\n", n, field, progress);
+
+    pthread_mutex_lock(&p->progress_mutex);
+    while (progress[field] < n)
+        pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+    pthread_mutex_unlock(&p->progress_mutex);
+}
+
+void ff_thread_finish_setup(AVCodecContext *avctx) {
+    PerThreadContext *p = avctx->internal->thread_ctx;
+
+    if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
+
+    if(p->state == STATE_SETUP_FINISHED){
+        av_log(avctx, AV_LOG_WARNING, "Multiple ff_thread_finish_setup() calls\n");
+    }
+
+    pthread_mutex_lock(&p->progress_mutex);
+    p->state = STATE_SETUP_FINISHED;
+    pthread_cond_broadcast(&p->progress_cond);
+    pthread_mutex_unlock(&p->progress_mutex);
+}
+
+/// Waits for all threads to finish.
+static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count)
+{
+    int i;
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+
+        if (p->state != STATE_INPUT_READY) {
+            pthread_mutex_lock(&p->progress_mutex);
+            while (p->state != STATE_INPUT_READY)
+                pthread_cond_wait(&p->output_cond, &p->progress_mutex);
+            pthread_mutex_unlock(&p->progress_mutex);
+        }
+        p->got_frame = 0;
+    }
+}
+
+void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
+{
+    FrameThreadContext *fctx = avctx->internal->thread_ctx;
+    const AVCodec *codec = avctx->codec;
+    int i;
+
+    park_frame_worker_threads(fctx, thread_count);
+
+    if (fctx->prev_thread && fctx->prev_thread != fctx->threads)
+        if (update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Final thread update failed\n");
+            fctx->prev_thread->avctx->internal->is_copy = fctx->threads->avctx->internal->is_copy;
+            fctx->threads->avctx->internal->is_copy = 1;
+        }
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+
+        pthread_mutex_lock(&p->mutex);
+        p->die = 1;
+        pthread_cond_signal(&p->input_cond);
+        pthread_mutex_unlock(&p->mutex);
+
+        if (p->thread_init)
+            pthread_join(p->thread, NULL);
+        p->thread_init=0;
+
+        if (codec->close && p->avctx)
+            codec->close(p->avctx);
+
+        release_delayed_buffers(p);
+        av_frame_free(&p->frame);
+    }
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+
+        pthread_mutex_destroy(&p->mutex);
+        pthread_mutex_destroy(&p->progress_mutex);
+        pthread_cond_destroy(&p->input_cond);
+        pthread_cond_destroy(&p->progress_cond);
+        pthread_cond_destroy(&p->output_cond);
+        av_packet_unref(&p->avpkt);
+        av_freep(&p->released_buffers);
+
+        if (i && p->avctx) {
+            av_freep(&p->avctx->priv_data);
+            av_freep(&p->avctx->slice_offset);
+        }
+
+        if (p->avctx)
+            av_freep(&p->avctx->internal);
+        av_freep(&p->avctx);
+    }
+
+    av_freep(&fctx->threads);
+    pthread_mutex_destroy(&fctx->buffer_mutex);
+    av_freep(&avctx->internal->thread_ctx);
+
+    if (avctx->priv_data && avctx->codec && avctx->codec->priv_class)
+        av_opt_free(avctx->priv_data);
+    avctx->codec = NULL;
+}
+
+int ff_frame_thread_init(AVCodecContext *avctx)
+{
+    int thread_count = avctx->thread_count;
+    const AVCodec *codec = avctx->codec;
+    AVCodecContext *src = avctx;
+    FrameThreadContext *fctx;
+    int i, err = 0;
+
+#if HAVE_W32THREADS
+    w32thread_init();
+#endif
+
+    if (!thread_count) {
+        int nb_cpus = av_cpu_count();
+        if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) || avctx->debug_mv)
+            nb_cpus = 1;
+        // use number of cores + 1 as thread count if there is more than one
+        if (nb_cpus > 1)
+            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
+        else
+            thread_count = avctx->thread_count = 1;
+    }
+
+    if (thread_count <= 1) {
+        avctx->active_thread_type = 0;
+        return 0;
+    }
+
+    avctx->internal->thread_ctx = fctx = av_mallocz(sizeof(FrameThreadContext));
+    if (!fctx)
+        return AVERROR(ENOMEM);
+
+    fctx->threads = av_mallocz_array(thread_count, sizeof(PerThreadContext));
+    if (!fctx->threads) {
+        av_freep(&avctx->internal->thread_ctx);
+        return AVERROR(ENOMEM);
+    }
+
+    pthread_mutex_init(&fctx->buffer_mutex, NULL);
+    fctx->delaying = 1;
+
+    for (i = 0; i < thread_count; i++) {
+        AVCodecContext *copy = av_malloc(sizeof(AVCodecContext));
+        PerThreadContext *p  = &fctx->threads[i];
+
+        pthread_mutex_init(&p->mutex, NULL);
+        pthread_mutex_init(&p->progress_mutex, NULL);
+        pthread_cond_init(&p->input_cond, NULL);
+        pthread_cond_init(&p->progress_cond, NULL);
+        pthread_cond_init(&p->output_cond, NULL);
+
+        p->frame = av_frame_alloc();
+        if (!p->frame) {
+            av_freep(&copy);
+            err = AVERROR(ENOMEM);
+            goto error;
+        }
+
+        p->parent = fctx;
+        p->avctx  = copy;
+
+        if (!copy) {
+            err = AVERROR(ENOMEM);
+            goto error;
+        }
+
+        *copy = *src;
+
+        copy->internal = av_malloc(sizeof(AVCodecInternal));
+        if (!copy->internal) {
+            copy->priv_data = NULL;
+            err = AVERROR(ENOMEM);
+            goto error;
+        }
+        *copy->internal = *src->internal;
+        copy->internal->thread_ctx = p;
+        copy->internal->pkt = &p->avpkt;
+
+        if (!i) {
+            src = copy;
+
+            if (codec->init)
+                err = codec->init(copy);
+
+            update_context_from_thread(avctx, copy, 1);
+        } else {
+            copy->priv_data = av_malloc(codec->priv_data_size);
+            if (!copy->priv_data) {
+                err = AVERROR(ENOMEM);
+                goto error;
+            }
+            memcpy(copy->priv_data, src->priv_data, codec->priv_data_size);
+            copy->internal->is_copy = 1;
+
+            if (codec->init_thread_copy)
+                err = codec->init_thread_copy(copy);
+        }
+
+        if (err) goto error;
+
+        err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p));
+        p->thread_init= !err;
+        if(!p->thread_init)
+            goto error;
+    }
+
+    return 0;
+
+error:
+    ff_frame_thread_free(avctx, i+1);
+
+    return err;
+}
+
+void ff_thread_flush(AVCodecContext *avctx)
+{
+    int i;
+    FrameThreadContext *fctx = avctx->internal->thread_ctx;
+
+    if (!fctx) return;
+
+    park_frame_worker_threads(fctx, avctx->thread_count);
+    if (fctx->prev_thread) {
+        if (fctx->prev_thread != &fctx->threads[0])
+            update_context_from_thread(fctx->threads[0].avctx, fctx->prev_thread->avctx, 0);
+    }
+
+    fctx->next_decoding = fctx->next_finished = 0;
+    fctx->delaying = 1;
+    fctx->prev_thread = NULL;
+    for (i = 0; i < avctx->thread_count; i++) {
+        PerThreadContext *p = &fctx->threads[i];
+        // Make sure decode flush calls with size=0 won't return old frames
+        p->got_frame = 0;
+        av_frame_unref(p->frame);
+
+        release_delayed_buffers(p);
+
+        if (avctx->codec->flush)
+            avctx->codec->flush(p->avctx);
+    }
+}
+
+int ff_thread_can_start_frame(AVCodecContext *avctx)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+    if ((avctx->active_thread_type&FF_THREAD_FRAME) && p->state != STATE_SETTING_UP &&
+        (avctx->codec->update_thread_context || !THREAD_SAFE_CALLBACKS(avctx))) {
+        return 0;
+    }
+    return 1;
+}
+
+static int thread_get_buffer_internal(AVCodecContext *avctx, ThreadFrame *f, int flags)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+    int err;
+
+    f->owner = avctx;
+
+    ff_init_buffer_info(avctx, f->f);
+
+    if (!(avctx->active_thread_type & FF_THREAD_FRAME))
+        return ff_get_buffer(avctx, f->f, flags);
+
+    if (p->state != STATE_SETTING_UP &&
+        (avctx->codec->update_thread_context || !THREAD_SAFE_CALLBACKS(avctx))) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() cannot be called after ff_thread_finish_setup()\n");
+        return -1;
+    }
+
+    if (avctx->internal->allocate_progress) {
+        int *progress;
+        f->progress = av_buffer_alloc(2 * sizeof(int));
+        if (!f->progress) {
+            return AVERROR(ENOMEM);
+        }
+        progress = (int*)f->progress->data;
+
+        progress[0] = progress[1] = -1;
+    }
+
+    pthread_mutex_lock(&p->parent->buffer_mutex);
+    if (avctx->thread_safe_callbacks ||
+        avctx->get_buffer2 == avcodec_default_get_buffer2) {
+        err = ff_get_buffer(avctx, f->f, flags);
+    } else {
+        pthread_mutex_lock(&p->progress_mutex);
+        p->requested_frame = f->f;
+        p->requested_flags = flags;
+        p->state = STATE_GET_BUFFER;
+        pthread_cond_broadcast(&p->progress_cond);
+
+        while (p->state != STATE_SETTING_UP)
+            pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+
+        err = p->result;
+
+        pthread_mutex_unlock(&p->progress_mutex);
+
+    }
+    if (!THREAD_SAFE_CALLBACKS(avctx) && !avctx->codec->update_thread_context)
+        ff_thread_finish_setup(avctx);
+    if (err)
+        av_buffer_unref(&f->progress);
+
+    pthread_mutex_unlock(&p->parent->buffer_mutex);
+
+    return err;
+}
+
+enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
+{
+    enum AVPixelFormat res;
+    PerThreadContext *p = avctx->internal->thread_ctx;
+    if (!(avctx->active_thread_type & FF_THREAD_FRAME) || avctx->thread_safe_callbacks ||
+        avctx->get_format == avcodec_default_get_format)
+        return ff_get_format(avctx, fmt);
+    if (p->state != STATE_SETTING_UP) {
+        av_log(avctx, AV_LOG_ERROR, "get_format() cannot be called after ff_thread_finish_setup()\n");
+        return -1;
+    }
+    pthread_mutex_lock(&p->progress_mutex);
+    p->available_formats = fmt;
+    p->state = STATE_GET_FORMAT;
+    pthread_cond_broadcast(&p->progress_cond);
+
+    while (p->state != STATE_SETTING_UP)
+        pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+
+    res = p->result_format;
+
+    pthread_mutex_unlock(&p->progress_mutex);
+
+    return res;
+}
+
+int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
+{
+    int ret = thread_get_buffer_internal(avctx, f, flags);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "thread_get_buffer() failed\n");
+    return ret;
+}
+
+void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+    FrameThreadContext *fctx;
+    AVFrame *dst, *tmp;
+    int can_direct_free = !(avctx->active_thread_type & FF_THREAD_FRAME) ||
+                          avctx->thread_safe_callbacks                   ||
+                          avctx->get_buffer2 == avcodec_default_get_buffer2;
+
+    if (!f->f || !f->f->buf[0])
+        return;
+
+    if (avctx->debug & FF_DEBUG_BUFFERS)
+        av_log(avctx, AV_LOG_DEBUG, "thread_release_buffer called on pic %p\n", f);
+
+    av_buffer_unref(&f->progress);
+    f->owner    = NULL;
+
+    if (can_direct_free) {
+        av_frame_unref(f->f);
+        return;
+    }
+
+    fctx = p->parent;
+    pthread_mutex_lock(&fctx->buffer_mutex);
+
+    if (p->num_released_buffers + 1 >= INT_MAX / sizeof(*p->released_buffers))
+        goto fail;
+    tmp = av_fast_realloc(p->released_buffers, &p->released_buffers_allocated,
+                          (p->num_released_buffers + 1) *
+                          sizeof(*p->released_buffers));
+    if (!tmp)
+        goto fail;
+    p->released_buffers = tmp;
+
+    dst = &p->released_buffers[p->num_released_buffers];
+    av_frame_move_ref(dst, f->f);
+
+    p->num_released_buffers++;
+
+fail:
+    pthread_mutex_unlock(&fctx->buffer_mutex);
+}
diff --git a/media/ffvpx/libavcodec/pthread_internal.h b/media/ffvpx/libavcodec/pthread_internal.h
new file mode 100644
index 000000000..d2115cbba
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread_internal.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PTHREAD_INTERNAL_H
+#define AVCODEC_PTHREAD_INTERNAL_H
+
+#include "avcodec.h"
+
+/* H.264 slice threading seems to be buggy with more than 16 threads,
+ * limit the number of threads to 16 for automatic detection */
+#define MAX_AUTO_THREADS 16
+
+int ff_slice_thread_init(AVCodecContext *avctx);
+void ff_slice_thread_free(AVCodecContext *avctx);
+
+int ff_frame_thread_init(AVCodecContext *avctx);
+void ff_frame_thread_free(AVCodecContext *avctx, int thread_count);
+
+#endif // AVCODEC_PTHREAD_INTERNAL_H
diff --git a/media/ffvpx/libavcodec/pthread_slice.c b/media/ffvpx/libavcodec/pthread_slice.c
new file mode 100644
index 000000000..96a7643f6
--- /dev/null
+++ b/media/ffvpx/libavcodec/pthread_slice.c
@@ -0,0 +1,314 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Slice multithreading support functions
+ * @see doc/multithreading.txt
+ */
+
+#include "config.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "pthread_internal.h"
+#include "thread.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/thread.h"
+
+typedef int (action_func)(AVCodecContext *c, void *arg);
+typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr);
+
+typedef struct SliceThreadContext {
+    pthread_t *workers;
+    action_func *func;
+    action_func2 *func2;
+    void *args;
+    int *rets;
+    int job_count;
+    int job_size;
+
+    pthread_cond_t last_job_cond;
+    pthread_cond_t current_job_cond;
+    pthread_mutex_t current_job_lock;
+    unsigned current_execute;
+    int current_job;
+    int done;
+
+    int *entries;
+    int entries_count;
+    int thread_count;
+    pthread_cond_t *progress_cond;
+    pthread_mutex_t *progress_mutex;
+} SliceThreadContext;
+
+static void* attribute_align_arg worker(void *v)
+{
+    AVCodecContext *avctx = v;
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    unsigned last_execute = 0;
+    int our_job = c->job_count;
+    int thread_count = avctx->thread_count;
+    int self_id;
+
+    pthread_mutex_lock(&c->current_job_lock);
+    self_id = c->current_job++;
+    for (;;){
+        int ret;
+        while (our_job >= c->job_count) {
+            if (c->current_job == thread_count + c->job_count)
+                pthread_cond_signal(&c->last_job_cond);
+
+            while (last_execute == c->current_execute && !c->done)
+                pthread_cond_wait(&c->current_job_cond, &c->current_job_lock);
+            last_execute = c->current_execute;
+            our_job = self_id;
+
+            if (c->done) {
+                pthread_mutex_unlock(&c->current_job_lock);
+                return NULL;
+            }
+        }
+        pthread_mutex_unlock(&c->current_job_lock);
+
+        ret = c->func ? c->func(avctx, (char*)c->args + our_job*c->job_size):
+                                c->func2(avctx, c->args, our_job, self_id);
+        if (c->rets)
+            c->rets[our_job%c->job_count] = ret;
+
+        pthread_mutex_lock(&c->current_job_lock);
+        our_job = c->current_job++;
+    }
+}
+
+void ff_slice_thread_free(AVCodecContext *avctx)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    int i;
+
+    pthread_mutex_lock(&c->current_job_lock);
+    c->done = 1;
+    pthread_cond_broadcast(&c->current_job_cond);
+    for (i = 0; i < c->thread_count; i++)
+        pthread_cond_broadcast(&c->progress_cond[i]);
+    pthread_mutex_unlock(&c->current_job_lock);
+
+    for (i=0; i<avctx->thread_count; i++)
+         pthread_join(c->workers[i], NULL);
+
+    for (i = 0; i < c->thread_count; i++) {
+        pthread_mutex_destroy(&c->progress_mutex[i]);
+        pthread_cond_destroy(&c->progress_cond[i]);
+    }
+
+    pthread_mutex_destroy(&c->current_job_lock);
+    pthread_cond_destroy(&c->current_job_cond);
+    pthread_cond_destroy(&c->last_job_cond);
+
+    av_freep(&c->entries);
+    av_freep(&c->progress_mutex);
+    av_freep(&c->progress_cond);
+
+    av_freep(&c->workers);
+    av_freep(&avctx->internal->thread_ctx);
+}
+
+static av_always_inline void thread_park_workers(SliceThreadContext *c, int thread_count)
+{
+    while (c->current_job != thread_count + c->job_count)
+        pthread_cond_wait(&c->last_job_cond, &c->current_job_lock);
+    pthread_mutex_unlock(&c->current_job_lock);
+}
+
+static int thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+
+    if (!(avctx->active_thread_type&FF_THREAD_SLICE) || avctx->thread_count <= 1)
+        return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size);
+
+    if (job_count <= 0)
+        return 0;
+
+    pthread_mutex_lock(&c->current_job_lock);
+
+    c->current_job = avctx->thread_count;
+    c->job_count = job_count;
+    c->job_size = job_size;
+    c->args = arg;
+    c->func = func;
+    if (ret) {
+        c->rets = ret;
+    } else {
+        c->rets = NULL;
+    }
+    c->current_execute++;
+    pthread_cond_broadcast(&c->current_job_cond);
+
+    thread_park_workers(c, avctx->thread_count);
+
+    return 0;
+}
+
+static int thread_execute2(AVCodecContext *avctx, action_func2* func2, void *arg, int *ret, int job_count)
+{
+    SliceThreadContext *c = avctx->internal->thread_ctx;
+    c->func2 = func2;
+    return thread_execute(avctx, NULL, arg, ret, job_count, 0);
+}
+
+int ff_slice_thread_init(AVCodecContext *avctx)
+{
+    int i;
+    SliceThreadContext *c;
+    int thread_count = avctx->thread_count;
+
+#if HAVE_W32THREADS
+    w32thread_init();
+#endif
+
+    // We cannot do this in the encoder init as the threads are created before
+    if (av_codec_is_encoder(avctx->codec) &&
+        avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
+        avctx->height > 2800)
+        thread_count = avctx->thread_count = 1;
+
+    if (!thread_count) {
+        int nb_cpus = av_cpu_count();
+        if  (avctx->height)
+            nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16);
+        // use number of cores + 1 as thread count if there is more than one
+        if (nb_cpus > 1)
+            thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
+        else
+            thread_count = avctx->thread_count = 1;
+    }
+
+    if (thread_count <= 1) {
+        avctx->active_thread_type = 0;
+        return 0;
+    }
+
+    c = av_mallocz(sizeof(SliceThreadContext));
+    if (!c)
+        return -1;
+
+    c->workers = av_mallocz_array(thread_count, sizeof(pthread_t));
+    if (!c->workers) {
+        av_free(c);
+        return -1;
+    }
+
+    avctx->internal->thread_ctx = c;
+    c->current_job = 0;
+    c->job_count = 0;
+    c->job_size = 0;
+    c->done = 0;
+    pthread_cond_init(&c->current_job_cond, NULL);
+    pthread_cond_init(&c->last_job_cond, NULL);
+    pthread_mutex_init(&c->current_job_lock, NULL);
+    pthread_mutex_lock(&c->current_job_lock);
+    for (i=0; i<thread_count; i++) {
+        if(pthread_create(&c->workers[i], NULL, worker, avctx)) {
+           avctx->thread_count = i;
+           pthread_mutex_unlock(&c->current_job_lock);
+           ff_thread_free(avctx);
+           return -1;
+        }
+    }
+
+    thread_park_workers(c, thread_count);
+
+    avctx->execute = thread_execute;
+    avctx->execute2 = thread_execute2;
+    return 0;
+}
+
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n)
+{
+    SliceThreadContext *p = avctx->internal->thread_ctx;
+    int *entries = p->entries;
+
+    pthread_mutex_lock(&p->progress_mutex[thread]);
+    entries[field] +=n;
+    pthread_cond_signal(&p->progress_cond[thread]);
+    pthread_mutex_unlock(&p->progress_mutex[thread]);
+}
+
+void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift)
+{
+    SliceThreadContext *p  = avctx->internal->thread_ctx;
+    int *entries      = p->entries;
+
+    if (!entries || !field) return;
+
+    thread = thread ? thread - 1 : p->thread_count - 1;
+
+    pthread_mutex_lock(&p->progress_mutex[thread]);
+    while ((entries[field - 1] - entries[field]) < shift){
+        pthread_cond_wait(&p->progress_cond[thread], &p->progress_mutex[thread]);
+    }
+    pthread_mutex_unlock(&p->progress_mutex[thread]);
+}
+
+int ff_alloc_entries(AVCodecContext *avctx, int count)
+{
+    int i;
+
+    if (avctx->active_thread_type & FF_THREAD_SLICE)  {
+        SliceThreadContext *p = avctx->internal->thread_ctx;
+
+        if (p->entries) {
+            av_assert0(p->thread_count == avctx->thread_count);
+            av_freep(&p->entries);
+        }
+
+        p->thread_count  = avctx->thread_count;
+        p->entries       = av_mallocz_array(count, sizeof(int));
+
+        if (!p->progress_mutex) {
+            p->progress_mutex = av_malloc_array(p->thread_count, sizeof(pthread_mutex_t));
+            p->progress_cond  = av_malloc_array(p->thread_count, sizeof(pthread_cond_t));
+        }
+
+        if (!p->entries || !p->progress_mutex || !p->progress_cond) {
+            av_freep(&p->entries);
+            av_freep(&p->progress_mutex);
+            av_freep(&p->progress_cond);
+            return AVERROR(ENOMEM);
+        }
+        p->entries_count  = count;
+
+        for (i = 0; i < p->thread_count; i++) {
+            pthread_mutex_init(&p->progress_mutex[i], NULL);
+            pthread_cond_init(&p->progress_cond[i], NULL);
+        }
+    }
+
+    return 0;
+}
+
+void ff_reset_entries(AVCodecContext *avctx)
+{
+    SliceThreadContext *p = avctx->internal->thread_ctx;
+    memset(p->entries, 0, p->entries_count * sizeof(int));
+}
diff --git a/media/ffvpx/libavcodec/put_bits.h b/media/ffvpx/libavcodec/put_bits.h
new file mode 100644
index 000000000..68ed39119
--- /dev/null
+++ b/media/ffvpx/libavcodec/put_bits.h
@@ -0,0 +1,268 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream writer API
+ */
+
+#ifndef AVCODEC_PUT_BITS_H
+#define AVCODEC_PUT_BITS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
+
+typedef struct PutBitContext {
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+    int size_in_bits;
+} PutBitContext;
+
+/**
+ * Initialize the PutBitContext s.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer
+ */
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer,
+                                 int buffer_size)
+{
+    if (buffer_size < 0) {
+        buffer_size = 0;
+        buffer      = NULL;
+    }
+
+    s->size_in_bits = 8 * buffer_size;
+    s->buf          = buffer;
+    s->buf_end      = s->buf + buffer_size;
+    s->buf_ptr      = s->buf;
+    s->bit_left     = 32;
+    s->bit_buf      = 0;
+}
+
+/**
+ * Rebase the bit writer onto a reallocated buffer.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer,
+ *                    must be larger than the previous size
+ */
+static inline void rebase_put_bits(PutBitContext *s, uint8_t *buffer,
+                                   int buffer_size)
+{
+    av_assert0(8*buffer_size > s->size_in_bits);
+
+    s->buf_end = buffer + buffer_size;
+    s->buf_ptr = buffer + (s->buf_ptr - s->buf);
+    s->buf     = buffer;
+    s->size_in_bits = 8 * buffer_size;
+}
+
+/**
+ * @return the total number of bits written to the bitstream.
+ */
+static inline int put_bits_count(PutBitContext *s)
+{
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+}
+
+/**
+ * @return the number of bits available in the bitstream.
+ */
+static inline int put_bits_left(PutBitContext* s)
+{
+    return (s->buf_end - s->buf_ptr) * 8 - 32 + s->bit_left;
+}
+
+/**
+ * Pad the end of the output stream with zeros.
+ */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifndef BITSTREAM_WRITER_LE
+    if (s->bit_left < 32)
+        s->bit_buf <<= s->bit_left;
+#endif
+    while (s->bit_left < 32) {
+        av_assert0(s->buf_ptr < s->buf_end);
+#ifdef BITSTREAM_WRITER_LE
+        *s->buf_ptr++ = s->bit_buf;
+        s->bit_buf  >>= 8;
+#else
+        *s->buf_ptr++ = s->bit_buf >> 24;
+        s->bit_buf  <<= 8;
+#endif
+        s->bit_left  += 8;
+    }
+    s->bit_left = 32;
+    s->bit_buf  = 0;
+}
+
+#ifdef BITSTREAM_WRITER_LE
+#define avpriv_align_put_bits align_put_bits_unsupported_here
+#define avpriv_put_string ff_put_string_unsupported_here
+#define avpriv_copy_bits avpriv_copy_bits_unsupported_here
+#else
+/**
+ * Pad the bitstream with zeros up to the next byte boundary.
+ */
+void avpriv_align_put_bits(PutBitContext *s);
+
+/**
+ * Put the string string in the bitstream.
+ *
+ * @param terminate_string 0-terminates the written string if value is 1
+ */
+void avpriv_put_string(PutBitContext *pb, const char *string,
+                       int terminate_string);
+
+/**
+ * Copy the content of src to the bitstream.
+ *
+ * @param length the number of bits of src to copy
+ */
+void avpriv_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
+#endif
+
+/**
+ * Write up to 31 bits into a bitstream.
+ * Use put_bits32 to write 32 bits.
+ */
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+{
+    unsigned int bit_buf;
+    int bit_left;
+
+    av_assert2(n <= 31 && value < (1U << n));
+
+    bit_buf  = s->bit_buf;
+    bit_left = s->bit_left;
+
+    /* XXX: optimize */
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= value << (32 - bit_left);
+    if (n >= bit_left) {
+        if (3 < s->buf_end - s->buf_ptr) {
+            AV_WL32(s->buf_ptr, bit_buf);
+            s->buf_ptr += 4;
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
+        bit_buf     = value >> bit_left;
+        bit_left   += 32;
+    }
+    bit_left -= n;
+#else
+    if (n < bit_left) {
+        bit_buf     = (bit_buf << n) | value;
+        bit_left   -= n;
+    } else {
+        bit_buf   <<= bit_left;
+        bit_buf    |= value >> (n - bit_left);
+        if (3 < s->buf_end - s->buf_ptr) {
+            AV_WB32(s->buf_ptr, bit_buf);
+            s->buf_ptr += 4;
+        } else {
+            av_log(NULL, AV_LOG_ERROR, "Internal error, put_bits buffer too small\n");
+            av_assert2(0);
+        }
+        bit_left   += 32 - n;
+        bit_buf     = value;
+    }
+#endif
+
+    s->bit_buf  = bit_buf;
+    s->bit_left = bit_left;
+}
+
+static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
+{
+    av_assert2(n >= 0 && n <= 31);
+
+    put_bits(pb, n, av_mod_uintp2(value, n));
+}
+
+/**
+ * Write exactly 32 bits into a bitstream.
+ */
+static void av_unused put_bits32(PutBitContext *s, uint32_t value)
+{
+    int lo = value & 0xffff;
+    int hi = value >> 16;
+#ifdef BITSTREAM_WRITER_LE
+    put_bits(s, 16, lo);
+    put_bits(s, 16, hi);
+#else
+    put_bits(s, 16, hi);
+    put_bits(s, 16, lo);
+#endif
+}
+
+/**
+ * Return the pointer to the byte where the bitstream writer will put
+ * the next bit.
+ */
+static inline uint8_t *put_bits_ptr(PutBitContext *s)
+{
+    return s->buf_ptr;
+}
+
+/**
+ * Skip the given number of bytes.
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n)
+{
+    av_assert2((put_bits_count(s) & 7) == 0);
+    av_assert2(s->bit_left == 32);
+    av_assert0(n <= s->buf_end - s->buf_ptr);
+    s->buf_ptr += n;
+}
+
+/**
+ * Skip the given number of bits.
+ * Must only be used if the actual values in the bitstream do not matter.
+ * If n is 0 the behavior is undefined.
+ */
+static inline void skip_put_bits(PutBitContext *s, int n)
+{
+    s->bit_left -= n;
+    s->buf_ptr  -= 4 * (s->bit_left >> 5);
+    s->bit_left &= 31;
+}
+
+/**
+ * Change the end of the buffer.
+ *
+ * @param size the new size in bytes of the buffer where to put bits
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size)
+{
+    av_assert0(size <= INT_MAX/8 - 32);
+    s->buf_end = s->buf + size;
+    s->size_in_bits = 8*size;
+}
+
+#endif /* AVCODEC_PUT_BITS_H */
diff --git a/media/ffvpx/libavcodec/qpeldsp.h b/media/ffvpx/libavcodec/qpeldsp.h
new file mode 100644
index 000000000..91019eda9
--- /dev/null
+++ b/media/ffvpx/libavcodec/qpeldsp.h
@@ -0,0 +1,83 @@
+/*
+ * quarterpel DSP functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * quarterpel DSP functions
+ */
+
+#ifndef AVCODEC_QPELDSP_H
+#define AVCODEC_QPELDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void ff_put_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_pixels8x8_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_put_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+void ff_avg_pixels16x16_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride);
+
+void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
+                         int dst_stride, int src_stride1, int src_stride2,
+                         int h);
+
+#define DEF_OLD_QPEL(name)                                              \
+void ff_put_        ## name(uint8_t *dst /* align width (8 or 16) */,   \
+                            const uint8_t *src /* align 1 */,           \
+                            ptrdiff_t stride);                          \
+void ff_put_no_rnd_ ## name(uint8_t *dst /* align width (8 or 16) */,   \
+                            const uint8_t *src /* align 1 */,           \
+                            ptrdiff_t stride);                          \
+void ff_avg_        ## name(uint8_t *dst /* align width (8 or 16) */,   \
+                            const uint8_t *src /* align 1 */,           \
+                            ptrdiff_t stride);
+
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+
+typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
+                             const uint8_t *src /* align 1 */,
+                             ptrdiff_t stride);
+
+/**
+ * quarterpel DSP context
+ */
+typedef struct QpelDSPContext {
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+} QpelDSPContext;
+
+void ff_qpeldsp_init(QpelDSPContext *c);
+
+void ff_qpeldsp_init_x86(QpelDSPContext *c);
+void ff_qpeldsp_init_mips(QpelDSPContext *c);
+
+#endif /* AVCODEC_QPELDSP_H */
diff --git a/media/ffvpx/libavcodec/qsv_api.c b/media/ffvpx/libavcodec/qsv_api.c
new file mode 100644
index 000000000..327ff7d81
--- /dev/null
+++ b/media/ffvpx/libavcodec/qsv_api.c
@@ -0,0 +1,42 @@
+/*
+ * Intel MediaSDK QSV public API functions
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <stddef.h>
+
+#include "libavutil/mem.h"
+
+#if CONFIG_QSV
+#include "qsv.h"
+
+AVQSVContext *av_qsv_alloc_context(void)
+{
+    return av_mallocz(sizeof(AVQSVContext));
+}
+#else
+
+struct AVQSVContext *av_qsv_alloc_context(void);
+
+struct AVQSVContext *av_qsv_alloc_context(void)
+{
+    return NULL;
+}
+#endif
diff --git a/media/ffvpx/libavcodec/ratecontrol.h b/media/ffvpx/libavcodec/ratecontrol.h
new file mode 100644
index 000000000..c15f9e258
--- /dev/null
+++ b/media/ffvpx/libavcodec/ratecontrol.h
@@ -0,0 +1,103 @@
+/*
+ * Ratecontrol
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RATECONTROL_H
+#define AVCODEC_RATECONTROL_H
+
+/**
+ * @file
+ * ratecontrol header.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include "libavutil/eval.h"
+
+typedef struct Predictor{
+    double coeff;
+    double count;
+    double decay;
+} Predictor;
+
+typedef struct RateControlEntry{
+    int pict_type;
+    float qscale;
+    int mv_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int misc_bits;
+    int header_bits;
+    uint64_t expected_bits;
+    int new_pict_type;
+    float new_qscale;
+    int64_t mc_mb_var_sum;
+    int64_t mb_var_sum;
+    int i_count;
+    int skip_count;
+    int f_code;
+    int b_code;
+}RateControlEntry;
+
+/**
+ * rate control context.
+ */
+typedef struct RateControlContext{
+    int num_entries;              ///< number of RateControlEntries
+    RateControlEntry *entry;
+    double buffer_index;          ///< amount of bits in the video/audio buffer
+    Predictor pred[5];
+    double short_term_qsum;       ///< sum of recent qscales
+    double short_term_qcount;     ///< count of recent qscales
+    double pass1_rc_eq_output_sum;///< sum of the output of the rc equation, this is used for normalization
+    double pass1_wanted_bits;     ///< bits which should have been output by the pass1 code (including complexity init)
+    double last_qscale;
+    double last_qscale_for[5];    ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff
+    int64_t last_mc_mb_var_sum;
+    int64_t last_mb_var_sum;
+    uint64_t i_cplx_sum[5];
+    uint64_t p_cplx_sum[5];
+    uint64_t mv_bits_sum[5];
+    uint64_t qscale_sum[5];
+    int frame_count[5];
+    int last_non_b_pict_type;
+
+    void *non_lavc_opaque;        ///< context for non lavc rc code (for example xvid)
+    float dry_run_qscale;         ///< for xvid rc
+    int last_picture_number;      ///< for xvid rc
+    AVExpr * rc_eq_eval;
+}RateControlContext;
+
+struct MpegEncContext;
+
+/* rate control */
+int ff_rate_control_init(struct MpegEncContext *s);
+float ff_rate_estimate_qscale(struct MpegEncContext *s, int dry_run);
+void ff_write_pass1_stats(struct MpegEncContext *s);
+void ff_rate_control_uninit(struct MpegEncContext *s);
+int ff_vbv_update(struct MpegEncContext *s, int frame_size);
+void ff_get_2pass_fcode(struct MpegEncContext *s);
+
+int ff_xvid_rate_control_init(struct MpegEncContext *s);
+void ff_xvid_rate_control_uninit(struct MpegEncContext *s);
+float ff_xvid_rate_estimate_qscale(struct MpegEncContext *s, int dry_run);
+
+#endif /* AVCODEC_RATECONTROL_H */
diff --git a/media/ffvpx/libavcodec/raw.c b/media/ffvpx/libavcodec/raw.c
new file mode 100644
index 000000000..bfa2537b5
--- /dev/null
+++ b/media/ffvpx/libavcodec/raw.c
@@ -0,0 +1,308 @@
+/*
+ * Raw Video Codec
+ * Copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Raw Video Codec
+ */
+
+#include "avcodec.h"
+#include "raw.h"
+#include "libavutil/common.h"
+
+const PixelFormatTag ff_raw_pix_fmt_tags[] = {
+    { AV_PIX_FMT_YUV420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */
+    { AV_PIX_FMT_YUV420P, MKTAG('I', 'Y', 'U', 'V') },
+    { AV_PIX_FMT_YUV420P, MKTAG('Y', 'V', '1', '2') },
+    { AV_PIX_FMT_YUV410P, MKTAG('Y', 'U', 'V', '9') },
+    { AV_PIX_FMT_YUV410P, MKTAG('Y', 'V', 'U', '9') },
+    { AV_PIX_FMT_YUV411P, MKTAG('Y', '4', '1', 'B') },
+    { AV_PIX_FMT_YUV422P, MKTAG('Y', '4', '2', 'B') },
+    { AV_PIX_FMT_YUV422P, MKTAG('P', '4', '2', '2') },
+    { AV_PIX_FMT_YUV422P, MKTAG('Y', 'V', '1', '6') },
+    /* yuvjXXX formats are deprecated hacks specific to libav*,
+       they are identical to yuvXXX  */
+    { AV_PIX_FMT_YUVJ420P, MKTAG('I', '4', '2', '0') }, /* Planar formats */
+    { AV_PIX_FMT_YUVJ420P, MKTAG('I', 'Y', 'U', 'V') },
+    { AV_PIX_FMT_YUVJ420P, MKTAG('Y', 'V', '1', '2') },
+    { AV_PIX_FMT_YUVJ422P, MKTAG('Y', '4', '2', 'B') },
+    { AV_PIX_FMT_YUVJ422P, MKTAG('P', '4', '2', '2') },
+    { AV_PIX_FMT_GRAY8,    MKTAG('Y', '8', '0', '0') },
+    { AV_PIX_FMT_GRAY8,    MKTAG('Y', '8', ' ', ' ') },
+
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', '2') }, /* Packed formats */
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', '4', '2', '2') },
+    { AV_PIX_FMT_YUYV422, MKTAG('V', '4', '2', '2') },
+    { AV_PIX_FMT_YUYV422, MKTAG('V', 'Y', 'U', 'Y') },
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'N', 'V') },
+    { AV_PIX_FMT_YUYV422, MKTAG('Y', 'U', 'Y', 'V') },
+    { AV_PIX_FMT_YVYU422, MKTAG('Y', 'V', 'Y', 'U') }, /* Philips */
+    { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('H', 'D', 'Y', 'C') },
+    { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'N', 'V') },
+    { AV_PIX_FMT_UYVY422, MKTAG('U', 'Y', 'N', 'Y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('u', 'y', 'v', '1') },
+    { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', '1') },
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'R', 'n') }, /* Avid AVI Codec 1:1 */
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', '1', 'x') }, /* Avid 1:1x */
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'u', 'p') },
+    { AV_PIX_FMT_UYVY422, MKTAG('V', 'D', 'T', 'Z') }, /* SoftLab-NSK VideoTizer */
+    { AV_PIX_FMT_UYVY422, MKTAG('a', 'u', 'v', '2') },
+    { AV_PIX_FMT_UYVY422, MKTAG('c', 'y', 'u', 'v') }, /* CYUV is also Creative YUV */
+    { AV_PIX_FMT_UYYVYY411, MKTAG('Y', '4', '1', '1') },
+    { AV_PIX_FMT_GRAY8,   MKTAG('G', 'R', 'E', 'Y') },
+    { AV_PIX_FMT_NV12,    MKTAG('N', 'V', '1', '2') },
+    { AV_PIX_FMT_NV21,    MKTAG('N', 'V', '2', '1') },
+
+    /* nut */
+    { AV_PIX_FMT_RGB555LE, MKTAG('R', 'G', 'B', 15) },
+    { AV_PIX_FMT_BGR555LE, MKTAG('B', 'G', 'R', 15) },
+    { AV_PIX_FMT_RGB565LE, MKTAG('R', 'G', 'B', 16) },
+    { AV_PIX_FMT_BGR565LE, MKTAG('B', 'G', 'R', 16) },
+    { AV_PIX_FMT_RGB555BE, MKTAG(15 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_BGR555BE, MKTAG(15 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGB565BE, MKTAG(16 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_BGR565BE, MKTAG(16 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGB444LE, MKTAG('R', 'G', 'B', 12) },
+    { AV_PIX_FMT_BGR444LE, MKTAG('B', 'G', 'R', 12) },
+    { AV_PIX_FMT_RGB444BE, MKTAG(12 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_BGR444BE, MKTAG(12 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGBA64LE, MKTAG('R', 'B', 'A', 64 ) },
+    { AV_PIX_FMT_BGRA64LE, MKTAG('B', 'R', 'A', 64 ) },
+    { AV_PIX_FMT_RGBA64BE, MKTAG(64 , 'R', 'B', 'A') },
+    { AV_PIX_FMT_BGRA64BE, MKTAG(64 , 'B', 'R', 'A') },
+    { AV_PIX_FMT_RGBA,     MKTAG('R', 'G', 'B', 'A') },
+    { AV_PIX_FMT_RGB0,     MKTAG('R', 'G', 'B',  0 ) },
+    { AV_PIX_FMT_BGRA,     MKTAG('B', 'G', 'R', 'A') },
+    { AV_PIX_FMT_BGR0,     MKTAG('B', 'G', 'R',  0 ) },
+    { AV_PIX_FMT_ABGR,     MKTAG('A', 'B', 'G', 'R') },
+    { AV_PIX_FMT_0BGR,     MKTAG( 0 , 'B', 'G', 'R') },
+    { AV_PIX_FMT_ARGB,     MKTAG('A', 'R', 'G', 'B') },
+    { AV_PIX_FMT_0RGB,     MKTAG( 0 , 'R', 'G', 'B') },
+    { AV_PIX_FMT_RGB24,    MKTAG('R', 'G', 'B', 24 ) },
+    { AV_PIX_FMT_BGR24,    MKTAG('B', 'G', 'R', 24 ) },
+    { AV_PIX_FMT_YUV411P,  MKTAG('4', '1', '1', 'P') },
+    { AV_PIX_FMT_YUV422P,  MKTAG('4', '2', '2', 'P') },
+    { AV_PIX_FMT_YUVJ422P, MKTAG('4', '2', '2', 'P') },
+    { AV_PIX_FMT_YUV440P,  MKTAG('4', '4', '0', 'P') },
+    { AV_PIX_FMT_YUVJ440P, MKTAG('4', '4', '0', 'P') },
+    { AV_PIX_FMT_YUV444P,  MKTAG('4', '4', '4', 'P') },
+    { AV_PIX_FMT_YUVJ444P, MKTAG('4', '4', '4', 'P') },
+    { AV_PIX_FMT_MONOWHITE,MKTAG('B', '1', 'W', '0') },
+    { AV_PIX_FMT_MONOBLACK,MKTAG('B', '0', 'W', '1') },
+    { AV_PIX_FMT_BGR8,     MKTAG('B', 'G', 'R',  8 ) },
+    { AV_PIX_FMT_RGB8,     MKTAG('R', 'G', 'B',  8 ) },
+    { AV_PIX_FMT_BGR4,     MKTAG('B', 'G', 'R',  4 ) },
+    { AV_PIX_FMT_RGB4,     MKTAG('R', 'G', 'B',  4 ) },
+    { AV_PIX_FMT_RGB4_BYTE,MKTAG('B', '4', 'B', 'Y') },
+    { AV_PIX_FMT_BGR4_BYTE,MKTAG('R', '4', 'B', 'Y') },
+    { AV_PIX_FMT_RGB48LE,  MKTAG('R', 'G', 'B', 48 ) },
+    { AV_PIX_FMT_RGB48BE,  MKTAG( 48, 'R', 'G', 'B') },
+    { AV_PIX_FMT_BGR48LE,  MKTAG('B', 'G', 'R', 48 ) },
+    { AV_PIX_FMT_BGR48BE,  MKTAG( 48, 'B', 'G', 'R') },
+    { AV_PIX_FMT_GRAY16LE,    MKTAG('Y', '1',  0 , 16 ) },
+    { AV_PIX_FMT_GRAY16BE,    MKTAG(16 ,  0 , '1', 'Y') },
+    { AV_PIX_FMT_YUV420P9LE,  MKTAG('Y', '3', 11 ,  9 ) },
+    { AV_PIX_FMT_YUV420P9BE,  MKTAG( 9 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P9LE,  MKTAG('Y', '3', 10 ,  9 ) },
+    { AV_PIX_FMT_YUV422P9BE,  MKTAG( 9 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P9LE,  MKTAG('Y', '3',  0 ,  9 ) },
+    { AV_PIX_FMT_YUV444P9BE,  MKTAG( 9 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P10LE, MKTAG('Y', '3', 11 , 10 ) },
+    { AV_PIX_FMT_YUV420P10BE, MKTAG(10 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P10LE, MKTAG('Y', '3', 10 , 10 ) },
+    { AV_PIX_FMT_YUV422P10BE, MKTAG(10 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P10LE, MKTAG('Y', '3',  0 , 10 ) },
+    { AV_PIX_FMT_YUV444P10BE, MKTAG(10 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P12LE, MKTAG('Y', '3', 11 , 12 ) },
+    { AV_PIX_FMT_YUV420P12BE, MKTAG(12 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P12LE, MKTAG('Y', '3', 10 , 12 ) },
+    { AV_PIX_FMT_YUV422P12BE, MKTAG(12 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P12LE, MKTAG('Y', '3',  0 , 12 ) },
+    { AV_PIX_FMT_YUV444P12BE, MKTAG(12 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P14LE, MKTAG('Y', '3', 11 , 14 ) },
+    { AV_PIX_FMT_YUV420P14BE, MKTAG(14 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P14LE, MKTAG('Y', '3', 10 , 14 ) },
+    { AV_PIX_FMT_YUV422P14BE, MKTAG(14 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P14LE, MKTAG('Y', '3',  0 , 14 ) },
+    { AV_PIX_FMT_YUV444P14BE, MKTAG(14 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P16LE, MKTAG('Y', '3', 11 , 16 ) },
+    { AV_PIX_FMT_YUV420P16BE, MKTAG(16 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P16LE, MKTAG('Y', '3', 10 , 16 ) },
+    { AV_PIX_FMT_YUV422P16BE, MKTAG(16 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P16LE, MKTAG('Y', '3',  0 , 16 ) },
+    { AV_PIX_FMT_YUV444P16BE, MKTAG(16 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUVA420P,    MKTAG('Y', '4', 11 ,  8 ) },
+    { AV_PIX_FMT_YUVA422P,    MKTAG('Y', '4', 10 ,  8 ) },
+    { AV_PIX_FMT_YUVA444P,    MKTAG('Y', '4',  0 ,  8 ) },
+    { AV_PIX_FMT_YA8,         MKTAG('Y', '2',  0 ,  8 ) },
+    { AV_PIX_FMT_PAL8,        MKTAG('P', 'A', 'L',  8 ) },
+
+    { AV_PIX_FMT_YUVA420P9LE,  MKTAG('Y', '4', 11 ,  9 ) },
+    { AV_PIX_FMT_YUVA420P9BE,  MKTAG( 9 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P9LE,  MKTAG('Y', '4', 10 ,  9 ) },
+    { AV_PIX_FMT_YUVA422P9BE,  MKTAG( 9 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P9LE,  MKTAG('Y', '4',  0 ,  9 ) },
+    { AV_PIX_FMT_YUVA444P9BE,  MKTAG( 9 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA420P10LE, MKTAG('Y', '4', 11 , 10 ) },
+    { AV_PIX_FMT_YUVA420P10BE, MKTAG(10 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P10LE, MKTAG('Y', '4', 10 , 10 ) },
+    { AV_PIX_FMT_YUVA422P10BE, MKTAG(10 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P10LE, MKTAG('Y', '4',  0 , 10 ) },
+    { AV_PIX_FMT_YUVA444P10BE, MKTAG(10 ,  0 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA420P16LE, MKTAG('Y', '4', 11 , 16 ) },
+    { AV_PIX_FMT_YUVA420P16BE, MKTAG(16 , 11 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA422P16LE, MKTAG('Y', '4', 10 , 16 ) },
+    { AV_PIX_FMT_YUVA422P16BE, MKTAG(16 , 10 , '4', 'Y') },
+    { AV_PIX_FMT_YUVA444P16LE, MKTAG('Y', '4',  0 , 16 ) },
+    { AV_PIX_FMT_YUVA444P16BE, MKTAG(16 ,  0 , '4', 'Y') },
+
+    { AV_PIX_FMT_GBRP,         MKTAG('G', '3', 00 ,  8 ) },
+    { AV_PIX_FMT_GBRP9LE,      MKTAG('G', '3', 00 ,  9 ) },
+    { AV_PIX_FMT_GBRP9BE,      MKTAG( 9 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP10LE,     MKTAG('G', '3', 00 , 10 ) },
+    { AV_PIX_FMT_GBRP10BE,     MKTAG(10 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP12LE,     MKTAG('G', '3', 00 , 12 ) },
+    { AV_PIX_FMT_GBRP12BE,     MKTAG(12 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP14LE,     MKTAG('G', '3', 00 , 14 ) },
+    { AV_PIX_FMT_GBRP14BE,     MKTAG(14 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP16LE,     MKTAG('G', '3', 00 , 16 ) },
+    { AV_PIX_FMT_GBRP16BE,     MKTAG(16 , 00 , '3', 'G') },
+
+    { AV_PIX_FMT_XYZ12LE,      MKTAG('X', 'Y', 'Z' , 36 ) },
+    { AV_PIX_FMT_XYZ12BE,      MKTAG(36 , 'Z' , 'Y', 'X') },
+
+    { AV_PIX_FMT_BAYER_BGGR8,    MKTAG(0xBA, 'B', 'G', 8   ) },
+    { AV_PIX_FMT_BAYER_BGGR16LE, MKTAG(0xBA, 'B', 'G', 16  ) },
+    { AV_PIX_FMT_BAYER_BGGR16BE, MKTAG(16  , 'G', 'B', 0xBA) },
+    { AV_PIX_FMT_BAYER_RGGB8,    MKTAG(0xBA, 'R', 'G', 8   ) },
+    { AV_PIX_FMT_BAYER_RGGB16LE, MKTAG(0xBA, 'R', 'G', 16  ) },
+    { AV_PIX_FMT_BAYER_RGGB16BE, MKTAG(16  , 'G', 'R', 0xBA) },
+    { AV_PIX_FMT_BAYER_GBRG8,    MKTAG(0xBA, 'G', 'B', 8   ) },
+    { AV_PIX_FMT_BAYER_GBRG16LE, MKTAG(0xBA, 'G', 'B', 16  ) },
+    { AV_PIX_FMT_BAYER_GBRG16BE, MKTAG(16,   'B', 'G', 0xBA) },
+    { AV_PIX_FMT_BAYER_GRBG8,    MKTAG(0xBA, 'G', 'R', 8   ) },
+    { AV_PIX_FMT_BAYER_GRBG16LE, MKTAG(0xBA, 'G', 'R', 16  ) },
+    { AV_PIX_FMT_BAYER_GRBG16BE, MKTAG(16,   'R', 'G', 0xBA) },
+
+    /* quicktime */
+    { AV_PIX_FMT_YUV420P, MKTAG('R', '4', '2', '0') }, /* Radius DV YUV PAL */
+    { AV_PIX_FMT_YUV411P, MKTAG('R', '4', '1', '1') }, /* Radius DV YUV NTSC */
+    { AV_PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', 'y') },
+    { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'U', 'I') }, /* FIXME merge both fields */
+    { AV_PIX_FMT_UYVY422, MKTAG('b', 'x', 'y', 'v') },
+    { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', '2') },
+    { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', 's') },
+    { AV_PIX_FMT_YUYV422, MKTAG('D', 'V', 'O', 'O') }, /* Digital Voodoo SD 8 Bit */
+    { AV_PIX_FMT_RGB555LE,MKTAG('L', '5', '5', '5') },
+    { AV_PIX_FMT_RGB565LE,MKTAG('L', '5', '6', '5') },
+    { AV_PIX_FMT_RGB565BE,MKTAG('B', '5', '6', '5') },
+    { AV_PIX_FMT_BGR24,   MKTAG('2', '4', 'B', 'G') },
+    { AV_PIX_FMT_BGR24,   MKTAG('b', 'x', 'b', 'g') },
+    { AV_PIX_FMT_BGRA,    MKTAG('B', 'G', 'R', 'A') },
+    { AV_PIX_FMT_RGBA,    MKTAG('R', 'G', 'B', 'A') },
+    { AV_PIX_FMT_RGB24,   MKTAG('b', 'x', 'r', 'g') },
+    { AV_PIX_FMT_ABGR,    MKTAG('A', 'B', 'G', 'R') },
+    { AV_PIX_FMT_GRAY16BE,MKTAG('b', '1', '6', 'g') },
+    { AV_PIX_FMT_RGB48BE, MKTAG('b', '4', '8', 'r') },
+
+    /* vlc */
+    { AV_PIX_FMT_YUV410P,     MKTAG('I', '4', '1', '0') },
+    { AV_PIX_FMT_YUV411P,     MKTAG('I', '4', '1', '1') },
+    { AV_PIX_FMT_YUV422P,     MKTAG('I', '4', '2', '2') },
+    { AV_PIX_FMT_YUV440P,     MKTAG('I', '4', '4', '0') },
+    { AV_PIX_FMT_YUV444P,     MKTAG('I', '4', '4', '4') },
+    { AV_PIX_FMT_YUVJ420P,    MKTAG('J', '4', '2', '0') },
+    { AV_PIX_FMT_YUVJ422P,    MKTAG('J', '4', '2', '2') },
+    { AV_PIX_FMT_YUVJ440P,    MKTAG('J', '4', '4', '0') },
+    { AV_PIX_FMT_YUVJ444P,    MKTAG('J', '4', '4', '4') },
+    { AV_PIX_FMT_YUVA444P,    MKTAG('Y', 'U', 'V', 'A') },
+    { AV_PIX_FMT_YUVA420P,    MKTAG('I', '4', '0', 'A') },
+    { AV_PIX_FMT_YUVA422P,    MKTAG('I', '4', '2', 'A') },
+    { AV_PIX_FMT_RGB8,        MKTAG('R', 'G', 'B', '2') },
+    { AV_PIX_FMT_RGB555LE,    MKTAG('R', 'V', '1', '5') },
+    { AV_PIX_FMT_RGB565LE,    MKTAG('R', 'V', '1', '6') },
+    { AV_PIX_FMT_BGR24,       MKTAG('R', 'V', '2', '4') },
+    { AV_PIX_FMT_BGR0,        MKTAG('R', 'V', '3', '2') },
+    { AV_PIX_FMT_RGBA,        MKTAG('A', 'V', '3', '2') },
+    { AV_PIX_FMT_YUV420P9LE,  MKTAG('I', '0', '9', 'L') },
+    { AV_PIX_FMT_YUV420P9BE,  MKTAG('I', '0', '9', 'B') },
+    { AV_PIX_FMT_YUV422P9LE,  MKTAG('I', '2', '9', 'L') },
+    { AV_PIX_FMT_YUV422P9BE,  MKTAG('I', '2', '9', 'B') },
+    { AV_PIX_FMT_YUV444P9LE,  MKTAG('I', '4', '9', 'L') },
+    { AV_PIX_FMT_YUV444P9BE,  MKTAG('I', '4', '9', 'B') },
+    { AV_PIX_FMT_YUV420P10LE, MKTAG('I', '0', 'A', 'L') },
+    { AV_PIX_FMT_YUV420P10BE, MKTAG('I', '0', 'A', 'B') },
+    { AV_PIX_FMT_YUV422P10LE, MKTAG('I', '2', 'A', 'L') },
+    { AV_PIX_FMT_YUV422P10BE, MKTAG('I', '2', 'A', 'B') },
+    { AV_PIX_FMT_YUV444P10LE, MKTAG('I', '4', 'A', 'L') },
+    { AV_PIX_FMT_YUV444P10BE, MKTAG('I', '4', 'A', 'B') },
+    { AV_PIX_FMT_YUV444P16LE, MKTAG('I', '4', 'F', 'L') },
+    { AV_PIX_FMT_YUV444P16BE, MKTAG('I', '4', 'F', 'B') },
+
+    /* special */
+    { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
+    { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
+
+    { AV_PIX_FMT_NONE, 0 },
+};
+
+const struct PixelFormatTag *avpriv_get_raw_pix_fmt_tags(void)
+{
+    return ff_raw_pix_fmt_tags;
+}
+
+unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat fmt)
+{
+    const PixelFormatTag *tags = ff_raw_pix_fmt_tags;
+    while (tags->pix_fmt >= 0) {
+        if (tags->pix_fmt == fmt)
+            return tags->fourcc;
+        tags++;
+    }
+    return 0;
+}
+
+const PixelFormatTag avpriv_pix_fmt_bps_avi[] = {
+    { AV_PIX_FMT_PAL8,    1 },
+    { AV_PIX_FMT_PAL8,    2 },
+    { AV_PIX_FMT_PAL8,    4 },
+    { AV_PIX_FMT_PAL8,    8 },
+    { AV_PIX_FMT_RGB444LE, 12 },
+    { AV_PIX_FMT_RGB555LE, 15 },
+    { AV_PIX_FMT_RGB555LE, 16 },
+    { AV_PIX_FMT_BGR24,  24 },
+    { AV_PIX_FMT_BGRA,   32 },
+    { AV_PIX_FMT_NONE,    0 },
+};
+
+const PixelFormatTag avpriv_pix_fmt_bps_mov[] = {
+    { AV_PIX_FMT_PAL8,      1 },
+    { AV_PIX_FMT_PAL8,      2 },
+    { AV_PIX_FMT_PAL8,      4 },
+    { AV_PIX_FMT_PAL8,      8 },
+    { AV_PIX_FMT_RGB555BE, 16 },
+    { AV_PIX_FMT_RGB24,    24 },
+    { AV_PIX_FMT_ARGB,     32 },
+    { AV_PIX_FMT_PAL8,     33 },
+    { AV_PIX_FMT_NONE,      0 },
+};
diff --git a/media/ffvpx/libavcodec/raw.h b/media/ffvpx/libavcodec/raw.h
new file mode 100644
index 000000000..24bf4cc55
--- /dev/null
+++ b/media/ffvpx/libavcodec/raw.h
@@ -0,0 +1,47 @@
+/*
+ * Raw Video Codec
+ * Copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Raw Video Codec
+ */
+
+#ifndef AVCODEC_RAW_H
+#define AVCODEC_RAW_H
+
+#include "avcodec.h"
+#include "libavutil/internal.h"
+
+typedef struct PixelFormatTag {
+    enum AVPixelFormat pix_fmt;
+    unsigned int fourcc;
+} PixelFormatTag;
+
+extern const PixelFormatTag ff_raw_pix_fmt_tags[]; // exposed through avpriv_get_raw_pix_fmt_tags()
+
+const struct PixelFormatTag *avpriv_get_raw_pix_fmt_tags(void);
+
+enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags, unsigned int fourcc);
+
+extern av_export const PixelFormatTag avpriv_pix_fmt_bps_avi[];
+extern av_export const PixelFormatTag avpriv_pix_fmt_bps_mov[];
+
+#endif /* AVCODEC_RAW_H */
diff --git a/media/ffvpx/libavcodec/rectangle.h b/media/ffvpx/libavcodec/rectangle.h
new file mode 100644
index 000000000..df7c18a4e
--- /dev/null
+++ b/media/ffvpx/libavcodec/rectangle.h
@@ -0,0 +1,124 @@
+/*
+ * rectangle filling function
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * useful rectangle filling function
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_RECTANGLE_H
+#define AVCODEC_RECTANGLE_H
+
+#include "config.h"
+#include "libavutil/common.h"
+#include "libavutil/avassert.h"
+
+/**
+ * fill a rectangle.
+ * @param h height of the rectangle, should be a constant
+ * @param w width of the rectangle, should be a constant
+ * @param size the size of val (1, 2 or 4), should be a constant
+ */
+static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
+    uint8_t *p= (uint8_t*)vp;
+    av_assert2(size==1 || size==2 || size==4);
+    av_assert2(w<=4);
+
+    w      *= size;
+    stride *= size;
+
+    av_assert2((((long)vp)&(FFMIN(w, 8<<(HAVE_NEON|ARCH_PPC|HAVE_MMX))-1)) == 0);
+    av_assert2((stride&(w-1))==0);
+    if(w==2){
+        const uint16_t v= size==4 ? val : val*0x0101;
+        *(uint16_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint16_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint16_t*)(p + 2*stride)= v;
+        *(uint16_t*)(p + 3*stride)= v;
+    }else if(w==4){
+        const uint32_t v= size==4 ? val : size==2 ? val*0x00010001 : val*0x01010101;
+        *(uint32_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint32_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint32_t*)(p + 2*stride)= v;
+        *(uint32_t*)(p + 3*stride)= v;
+    }else if(w==8){
+    // gcc cannot optimize 64-bit math on x86_32
+#if HAVE_FAST_64BIT
+        const uint64_t v=  size==2 ? val*0x0001000100010001ULL : val*0x0100000001ULL;
+        *(uint64_t*)(p + 0*stride)= v;
+        if(h==1) return;
+        *(uint64_t*)(p + 1*stride)= v;
+        if(h==2) return;
+        *(uint64_t*)(p + 2*stride)= v;
+        *(uint64_t*)(p + 3*stride)= v;
+    }else if(w==16){
+        const uint64_t v= val*0x0100000001ULL;
+        *(uint64_t*)(p + 0+0*stride)= v;
+        *(uint64_t*)(p + 8+0*stride)= v;
+        *(uint64_t*)(p + 0+1*stride)= v;
+        *(uint64_t*)(p + 8+1*stride)= v;
+        if(h==2) return;
+        *(uint64_t*)(p + 0+2*stride)= v;
+        *(uint64_t*)(p + 8+2*stride)= v;
+        *(uint64_t*)(p + 0+3*stride)= v;
+        *(uint64_t*)(p + 8+3*stride)= v;
+#else
+        const uint32_t v= size==2 ? val*0x00010001 : val;
+        *(uint32_t*)(p + 0+0*stride)= v;
+        *(uint32_t*)(p + 4+0*stride)= v;
+        if(h==1) return;
+        *(uint32_t*)(p + 0+1*stride)= v;
+        *(uint32_t*)(p + 4+1*stride)= v;
+        if(h==2) return;
+        *(uint32_t*)(p + 0+2*stride)= v;
+        *(uint32_t*)(p + 4+2*stride)= v;
+        *(uint32_t*)(p + 0+3*stride)= v;
+        *(uint32_t*)(p + 4+3*stride)= v;
+    }else if(w==16){
+        *(uint32_t*)(p + 0+0*stride)= val;
+        *(uint32_t*)(p + 4+0*stride)= val;
+        *(uint32_t*)(p + 8+0*stride)= val;
+        *(uint32_t*)(p +12+0*stride)= val;
+        *(uint32_t*)(p + 0+1*stride)= val;
+        *(uint32_t*)(p + 4+1*stride)= val;
+        *(uint32_t*)(p + 8+1*stride)= val;
+        *(uint32_t*)(p +12+1*stride)= val;
+        if(h==2) return;
+        *(uint32_t*)(p + 0+2*stride)= val;
+        *(uint32_t*)(p + 4+2*stride)= val;
+        *(uint32_t*)(p + 8+2*stride)= val;
+        *(uint32_t*)(p +12+2*stride)= val;
+        *(uint32_t*)(p + 0+3*stride)= val;
+        *(uint32_t*)(p + 4+3*stride)= val;
+        *(uint32_t*)(p + 8+3*stride)= val;
+        *(uint32_t*)(p +12+3*stride)= val;
+#endif
+    }else
+        av_assert2(0);
+    av_assert2(h==4);
+}
+
+#endif /* AVCODEC_RECTANGLE_H */
diff --git a/media/ffvpx/libavcodec/resample.c b/media/ffvpx/libavcodec/resample.c
new file mode 100644
index 000000000..4c5eb9f10
--- /dev/null
+++ b/media/ffvpx/libavcodec/resample.c
@@ -0,0 +1,439 @@
+/*
+ * samplerate conversion for both audio and video
+ * Copyright (c) 2000 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * samplerate conversion for both audio and video
+ */
+
+#include <string.h>
+
+#include "avcodec.h"
+#include "audioconvert.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+#include "libavutil/samplefmt.h"
+
+#if FF_API_AVCODEC_RESAMPLE
+FF_DISABLE_DEPRECATION_WARNINGS
+
+#define MAX_CHANNELS 8
+
+struct AVResampleContext;
+
+static const char *context_to_name(void *ptr)
+{
+    return "audioresample";
+}
+
+static const AVOption options[] = {{NULL}};
+static const AVClass audioresample_context_class = {
+    "ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT
+};
+
+struct ReSampleContext {
+    struct AVResampleContext *resample_context;
+    short *temp[MAX_CHANNELS];
+    int temp_len;
+    float ratio;
+    /* channel convert */
+    int input_channels, output_channels, filter_channels;
+    AVAudioConvert *convert_ctx[2];
+    enum AVSampleFormat sample_fmt[2]; ///< input and output sample format
+    unsigned sample_size[2];           ///< size of one sample in sample_fmt
+    short *buffer[2];                  ///< buffers used for conversion to S16
+    unsigned buffer_size[2];           ///< sizes of allocated buffers
+};
+
+/* n1: number of samples */
+static void stereo_to_mono(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q[1] = (p[2] + p[3]) >> 1;
+        q[2] = (p[4] + p[5]) >> 1;
+        q[3] = (p[6] + p[7]) >> 1;
+        q += 4;
+        p += 8;
+        n -= 4;
+    }
+    while (n > 0) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q++;
+        p += 2;
+        n--;
+    }
+}
+
+/* n1: number of samples */
+static void mono_to_stereo(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+    int v;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        v = p[0]; q[0] = v; q[1] = v;
+        v = p[1]; q[2] = v; q[3] = v;
+        v = p[2]; q[4] = v; q[5] = v;
+        v = p[3]; q[6] = v; q[7] = v;
+        q += 8;
+        p += 4;
+        n -= 4;
+    }
+    while (n > 0) {
+        v = p[0]; q[0] = v; q[1] = v;
+        q += 2;
+        p += 1;
+        n--;
+    }
+}
+
+/*
+5.1 to stereo input: [fl, fr, c, lfe, rl, rr]
+- Left = front_left + rear_gain * rear_left + center_gain * center
+- Right = front_right + rear_gain * rear_right + center_gain * center
+Where rear_gain is usually around 0.5-1.0 and
+      center_gain is almost always 0.7 (-3 dB)
+*/
+static void surround_to_stereo(short **output, short *input, int channels, int samples)
+{
+    int i;
+    short l, r;
+
+    for (i = 0; i < samples; i++) {
+        int fl,fr,c,rl,rr;
+        fl = input[0];
+        fr = input[1];
+        c = input[2];
+        // lfe = input[3];
+        rl = input[4];
+        rr = input[5];
+
+        l = av_clip_int16(fl + (0.5 * rl) + (0.7 * c));
+        r = av_clip_int16(fr + (0.5 * rr) + (0.7 * c));
+
+        /* output l & r. */
+        *output[0]++ = l;
+        *output[1]++ = r;
+
+        /* increment input. */
+        input += channels;
+    }
+}
+
+static void deinterleave(short **output, short *input, int channels, int samples)
+{
+    int i, j;
+
+    for (i = 0; i < samples; i++) {
+        for (j = 0; j < channels; j++) {
+            *output[j]++ = *input++;
+        }
+    }
+}
+
+static void interleave(short *output, short **input, int channels, int samples)
+{
+    int i, j;
+
+    for (i = 0; i < samples; i++) {
+        for (j = 0; j < channels; j++) {
+            *output++ = *input[j]++;
+        }
+    }
+}
+
+static void ac3_5p1_mux(short *output, short *input1, short *input2, int n)
+{
+    int i;
+    short l, r;
+
+    for (i = 0; i < n; i++) {
+        l = *input1++;
+        r = *input2++;
+        *output++ = l;                  /* left */
+        *output++ = (l / 2) + (r / 2);  /* center */
+        *output++ = r;                  /* right */
+        *output++ = 0;                  /* left surround */
+        *output++ = 0;                  /* right surroud */
+        *output++ = 0;                  /* low freq */
+    }
+}
+
+#define SUPPORT_RESAMPLE(ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8) \
+    ch8<<7 | ch7<<6 | ch6<<5 | ch5<<4 | ch4<<3 | ch3<<2 | ch2<<1 | ch1<<0
+
+static const uint8_t supported_resampling[MAX_CHANNELS] = {
+    // output ch:    1  2  3  4  5  6  7  8
+    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 0, 0, 0), // 1 input channel
+    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 1, 0, 0), // 2 input channels
+    SUPPORT_RESAMPLE(0, 0, 1, 0, 0, 0, 0, 0), // 3 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 1, 0, 0, 0, 0), // 4 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 1, 0, 0, 0), // 5 input channels
+    SUPPORT_RESAMPLE(0, 1, 0, 0, 0, 1, 0, 0), // 6 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 1, 0), // 7 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 0, 1), // 8 input channels
+};
+
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum AVSampleFormat sample_fmt_out,
+                                        enum AVSampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff)
+{
+    ReSampleContext *s;
+
+    if (input_channels > MAX_CHANNELS) {
+        av_log(NULL, AV_LOG_ERROR,
+               "Resampling with input channels greater than %d is unsupported.\n",
+               MAX_CHANNELS);
+        return NULL;
+    }
+    if (!(supported_resampling[input_channels-1] & (1<<(output_channels-1)))) {
+        int i;
+        av_log(NULL, AV_LOG_ERROR, "Unsupported audio resampling. Allowed "
+               "output channels for %d input channel%s", input_channels,
+               input_channels > 1 ? "s:" : ":");
+        for (i = 0; i < MAX_CHANNELS; i++)
+            if (supported_resampling[input_channels-1] & (1<<i))
+                av_log(NULL, AV_LOG_ERROR, " %d", i + 1);
+        av_log(NULL, AV_LOG_ERROR, "\n");
+        return NULL;
+    }
+
+    s = av_mallocz(sizeof(ReSampleContext));
+    if (!s) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n");
+        return NULL;
+    }
+
+    s->ratio = (float)output_rate / (float)input_rate;
+
+    s->input_channels = input_channels;
+    s->output_channels = output_channels;
+
+    s->filter_channels = s->input_channels;
+    if (s->output_channels < s->filter_channels)
+        s->filter_channels = s->output_channels;
+
+    s->sample_fmt[0]  = sample_fmt_in;
+    s->sample_fmt[1]  = sample_fmt_out;
+    s->sample_size[0] = av_get_bytes_per_sample(s->sample_fmt[0]);
+    s->sample_size[1] = av_get_bytes_per_sample(s->sample_fmt[1]);
+
+    if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
+        if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1,
+                                                         s->sample_fmt[0], 1, NULL, 0))) {
+            av_log(s, AV_LOG_ERROR,
+                   "Cannot convert %s sample format to s16 sample format\n",
+                   av_get_sample_fmt_name(s->sample_fmt[0]));
+            av_free(s);
+            return NULL;
+        }
+    }
+
+    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
+        if (!(s->convert_ctx[1] = av_audio_convert_alloc(s->sample_fmt[1], 1,
+                                                         AV_SAMPLE_FMT_S16, 1, NULL, 0))) {
+            av_log(s, AV_LOG_ERROR,
+                   "Cannot convert s16 sample format to %s sample format\n",
+                   av_get_sample_fmt_name(s->sample_fmt[1]));
+            av_audio_convert_free(s->convert_ctx[0]);
+            av_free(s);
+            return NULL;
+        }
+    }
+
+    s->resample_context = av_resample_init(output_rate, input_rate,
+                                           filter_length, log2_phase_count,
+                                           linear, cutoff);
+
+    *(const AVClass**)s->resample_context = &audioresample_context_class;
+
+    return s;
+}
+
+/* resample audio. 'nb_samples' is the number of input samples */
+/* XXX: optimize it ! */
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples)
+{
+    int i, nb_samples1;
+    short *bufin[MAX_CHANNELS];
+    short *bufout[MAX_CHANNELS];
+    short *buftmp2[MAX_CHANNELS], *buftmp3[MAX_CHANNELS];
+    short *output_bak = NULL;
+    int lenout;
+
+    if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
+        int istride[1] = { s->sample_size[0] };
+        int ostride[1] = { 2 };
+        const void *ibuf[1] = { input };
+        void       *obuf[1];
+        unsigned input_size = nb_samples * s->input_channels * 2;
+
+        if (!s->buffer_size[0] || s->buffer_size[0] < input_size) {
+            av_free(s->buffer[0]);
+            s->buffer_size[0] = input_size;
+            s->buffer[0] = av_malloc(s->buffer_size[0]);
+            if (!s->buffer[0]) {
+                av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
+                return 0;
+            }
+        }
+
+        obuf[0] = s->buffer[0];
+
+        if (av_audio_convert(s->convert_ctx[0], obuf, ostride,
+                             ibuf, istride, nb_samples * s->input_channels) < 0) {
+            av_log(s->resample_context, AV_LOG_ERROR,
+                   "Audio sample format conversion failed\n");
+            return 0;
+        }
+
+        input = s->buffer[0];
+    }
+
+    lenout= 2*s->output_channels*nb_samples * s->ratio + 16;
+
+    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
+        int out_size = lenout * av_get_bytes_per_sample(s->sample_fmt[1]) *
+                       s->output_channels;
+        output_bak = output;
+
+        if (!s->buffer_size[1] || s->buffer_size[1] < out_size) {
+            av_free(s->buffer[1]);
+            s->buffer_size[1] = out_size;
+            s->buffer[1] = av_malloc(s->buffer_size[1]);
+            if (!s->buffer[1]) {
+                av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
+                return 0;
+            }
+        }
+
+        output = s->buffer[1];
+    }
+
+    /* XXX: move those malloc to resample init code */
+    for (i = 0; i < s->filter_channels; i++) {
+        bufin[i] = av_malloc_array((nb_samples + s->temp_len), sizeof(short));
+        bufout[i] = av_malloc_array(lenout, sizeof(short));
+
+        if (!bufin[i] || !bufout[i]) {
+            av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
+            nb_samples1 = 0;
+            goto fail;
+        }
+
+        memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short));
+        buftmp2[i] = bufin[i] + s->temp_len;
+    }
+
+    if (s->input_channels == 2 && s->output_channels == 1) {
+        buftmp3[0] = output;
+        stereo_to_mono(buftmp2[0], input, nb_samples);
+    } else if (s->output_channels >= 2 && s->input_channels == 1) {
+        buftmp3[0] = bufout[0];
+        memcpy(buftmp2[0], input, nb_samples * sizeof(short));
+    } else if (s->input_channels == 6 && s->output_channels ==2) {
+        buftmp3[0] = bufout[0];
+        buftmp3[1] = bufout[1];
+        surround_to_stereo(buftmp2, input, s->input_channels, nb_samples);
+    } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) {
+        for (i = 0; i < s->input_channels; i++) {
+            buftmp3[i] = bufout[i];
+        }
+        deinterleave(buftmp2, input, s->input_channels, nb_samples);
+    } else {
+        buftmp3[0] = output;
+        memcpy(buftmp2[0], input, nb_samples * sizeof(short));
+    }
+
+    nb_samples += s->temp_len;
+
+    /* resample each channel */
+    nb_samples1 = 0; /* avoid warning */
+    for (i = 0; i < s->filter_channels; i++) {
+        int consumed;
+        int is_last = i + 1 == s->filter_channels;
+
+        nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i],
+                                  &consumed, nb_samples, lenout, is_last);
+        s->temp_len = nb_samples - consumed;
+        s->temp[i] = av_realloc_array(s->temp[i], s->temp_len, sizeof(short));
+        memcpy(s->temp[i], bufin[i] + consumed, s->temp_len * sizeof(short));
+    }
+
+    if (s->output_channels == 2 && s->input_channels == 1) {
+        mono_to_stereo(output, buftmp3[0], nb_samples1);
+    } else if (s->output_channels == 6 && s->input_channels == 2) {
+        ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
+    } else if ((s->output_channels == s->input_channels && s->input_channels >= 2) ||
+               (s->output_channels == 2 && s->input_channels == 6)) {
+        interleave(output, buftmp3, s->output_channels, nb_samples1);
+    }
+
+    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
+        int istride[1] = { 2 };
+        int ostride[1] = { s->sample_size[1] };
+        const void *ibuf[1] = { output };
+        void       *obuf[1] = { output_bak };
+
+        if (av_audio_convert(s->convert_ctx[1], obuf, ostride,
+                             ibuf, istride, nb_samples1 * s->output_channels) < 0) {
+            av_log(s->resample_context, AV_LOG_ERROR,
+                   "Audio sample format conversion failed\n");
+            return 0;
+        }
+    }
+
+fail:
+    for (i = 0; i < s->filter_channels; i++) {
+        av_free(bufin[i]);
+        av_free(bufout[i]);
+    }
+
+    return nb_samples1;
+}
+
+void audio_resample_close(ReSampleContext *s)
+{
+    int i;
+    av_resample_close(s->resample_context);
+    for (i = 0; i < s->filter_channels; i++)
+        av_freep(&s->temp[i]);
+    av_freep(&s->buffer[0]);
+    av_freep(&s->buffer[1]);
+    av_audio_convert_free(s->convert_ctx[0]);
+    av_audio_convert_free(s->convert_ctx[1]);
+    av_free(s);
+}
+
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
diff --git a/media/ffvpx/libavcodec/resample2.c b/media/ffvpx/libavcodec/resample2.c
new file mode 100644
index 000000000..56ae9f722
--- /dev/null
+++ b/media/ffvpx/libavcodec/resample2.c
@@ -0,0 +1,319 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "libavutil/common.h"
+
+#if FF_API_AVCODEC_RESAMPLE
+
+#ifndef CONFIG_RESAMPLE_HP
+#define FILTER_SHIFT 15
+
+typedef int16_t FELEM;
+typedef int32_t FELEM2;
+typedef int64_t FELEML;
+#define FELEM_MAX INT16_MAX
+#define FELEM_MIN INT16_MIN
+#define WINDOW_TYPE 9
+#elif !defined(CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE)
+#define FILTER_SHIFT 30
+
+#define FELEM int32_t
+#define FELEM2 int64_t
+#define FELEML int64_t
+#define FELEM_MAX INT32_MAX
+#define FELEM_MIN INT32_MIN
+#define WINDOW_TYPE 12
+#else
+#define FILTER_SHIFT 0
+
+typedef double FELEM;
+typedef double FELEM2;
+typedef double FELEML;
+#define WINDOW_TYPE 24
+#endif
+
+
+typedef struct AVResampleContext{
+    const AVClass *av_class;
+    FELEM *filter_bank;
+    int filter_length;
+    int ideal_dst_incr;
+    int dst_incr;
+    int index;
+    int frac;
+    int src_incr;
+    int compensation_distance;
+    int phase_shift;
+    int phase_mask;
+    int linear;
+}AVResampleContext;
+
+/**
+ * 0th order modified bessel function of the first kind.
+ */
+static double bessel(double x){
+    double v=1;
+    double lastv=0;
+    double t=1;
+    int i;
+
+    x= x*x/4;
+    for(i=1; v != lastv; i++){
+        lastv=v;
+        t *= x/(i*i);
+        v += t;
+    }
+    return v;
+}
+
+/**
+ * Build a polyphase filterbank.
+ * @param factor resampling factor
+ * @param scale wanted sum of coefficients for each filter
+ * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2..16->kaiser windowed sinc beta=2..16
+ * @return 0 on success, negative on error
+ */
+static int build_filter(FELEM *filter, double factor, int tap_count, int phase_count, int scale, int type){
+    int ph, i;
+    double x, y, w;
+    double *tab = av_malloc_array(tap_count, sizeof(*tab));
+    const int center= (tap_count-1)/2;
+
+    if (!tab)
+        return AVERROR(ENOMEM);
+
+    /* if upsampling, only need to interpolate, no filter */
+    if (factor > 1.0)
+        factor = 1.0;
+
+    for(ph=0;ph<phase_count;ph++) {
+        double norm = 0;
+        for(i=0;i<tap_count;i++) {
+            x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
+            if (x == 0) y = 1.0;
+            else        y = sin(x) / x;
+            switch(type){
+            case 0:{
+                const float d= -0.5; //first order derivative = -0.5
+                x = fabs(((double)(i - center) - (double)ph / phase_count) * factor);
+                if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
+                else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
+                break;}
+            case 1:
+                w = 2.0*x / (factor*tap_count) + M_PI;
+                y *= 0.3635819 - 0.4891775 * cos(w) + 0.1365995 * cos(2*w) - 0.0106411 * cos(3*w);
+                break;
+            default:
+                w = 2.0*x / (factor*tap_count*M_PI);
+                y *= bessel(type*sqrt(FFMAX(1-w*w, 0)));
+                break;
+            }
+
+            tab[i] = y;
+            norm += y;
+        }
+
+        /* normalize so that an uniform color remains the same */
+        for(i=0;i<tap_count;i++) {
+#ifdef CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE
+            filter[ph * tap_count + i] = tab[i] / norm;
+#else
+            filter[ph * tap_count + i] = av_clip(lrintf(tab[i] * scale / norm), FELEM_MIN, FELEM_MAX);
+#endif
+        }
+    }
+#if 0
+    {
+#define LEN 1024
+        int j,k;
+        double sine[LEN + tap_count];
+        double filtered[LEN];
+        double maxff=-2, minff=2, maxsf=-2, minsf=2;
+        for(i=0; i<LEN; i++){
+            double ss=0, sf=0, ff=0;
+            for(j=0; j<LEN+tap_count; j++)
+                sine[j]= cos(i*j*M_PI/LEN);
+            for(j=0; j<LEN; j++){
+                double sum=0;
+                ph=0;
+                for(k=0; k<tap_count; k++)
+                    sum += filter[ph * tap_count + k] * sine[k+j];
+                filtered[j]= sum / (1<<FILTER_SHIFT);
+                ss+= sine[j + center] * sine[j + center];
+                ff+= filtered[j] * filtered[j];
+                sf+= sine[j + center] * filtered[j];
+            }
+            ss= sqrt(2*ss/LEN);
+            ff= sqrt(2*ff/LEN);
+            sf= 2*sf/LEN;
+            maxff= FFMAX(maxff, ff);
+            minff= FFMIN(minff, ff);
+            maxsf= FFMAX(maxsf, sf);
+            minsf= FFMIN(minsf, sf);
+            if(i%11==0){
+                av_log(NULL, AV_LOG_ERROR, "i:%4d ss:%f ff:%13.6e-%13.6e sf:%13.6e-%13.6e\n", i, ss, maxff, minff, maxsf, minsf);
+                minff=minsf= 2;
+                maxff=maxsf= -2;
+            }
+        }
+    }
+#endif
+
+    av_free(tab);
+    return 0;
+}
+
+AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_size, int phase_shift, int linear, double cutoff){
+    AVResampleContext *c= av_mallocz(sizeof(AVResampleContext));
+    double factor= FFMIN(out_rate * cutoff / in_rate, 1.0);
+    int phase_count= 1<<phase_shift;
+
+    if (!c)
+        return NULL;
+
+    c->phase_shift= phase_shift;
+    c->phase_mask= phase_count-1;
+    c->linear= linear;
+
+    c->filter_length= FFMAX((int)ceil(filter_size/factor), 1);
+    c->filter_bank= av_mallocz_array(c->filter_length, (phase_count+1)*sizeof(FELEM));
+    if (!c->filter_bank)
+        goto error;
+    if (build_filter(c->filter_bank, factor, c->filter_length, phase_count, 1<<FILTER_SHIFT, WINDOW_TYPE))
+        goto error;
+    memcpy(&c->filter_bank[c->filter_length*phase_count+1], c->filter_bank, (c->filter_length-1)*sizeof(FELEM));
+    c->filter_bank[c->filter_length*phase_count]= c->filter_bank[c->filter_length - 1];
+
+    if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2))
+        goto error;
+    c->ideal_dst_incr= c->dst_incr;
+
+    c->index= -phase_count*((c->filter_length-1)/2);
+
+    return c;
+error:
+    av_free(c->filter_bank);
+    av_free(c);
+    return NULL;
+}
+
+void av_resample_close(AVResampleContext *c){
+    av_freep(&c->filter_bank);
+    av_freep(&c);
+}
+
+void av_resample_compensate(AVResampleContext *c, int sample_delta, int compensation_distance){
+//    sample_delta += (c->ideal_dst_incr - c->dst_incr)*(int64_t)c->compensation_distance / c->ideal_dst_incr;
+    c->compensation_distance= compensation_distance;
+    c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance;
+}
+
+int av_resample(AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx){
+    int dst_index, i;
+    int index= c->index;
+    int frac= c->frac;
+    int dst_incr_frac= c->dst_incr % c->src_incr;
+    int dst_incr=      c->dst_incr / c->src_incr;
+    int compensation_distance= c->compensation_distance;
+
+  if(compensation_distance == 0 && c->filter_length == 1 && c->phase_shift==0){
+        int64_t index2= ((int64_t)index)<<32;
+        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
+        dst_size= FFMIN(dst_size, (src_size-1-index) * (int64_t)c->src_incr / c->dst_incr);
+
+        for(dst_index=0; dst_index < dst_size; dst_index++){
+            dst[dst_index] = src[index2>>32];
+            index2 += incr;
+        }
+        index += dst_index * dst_incr;
+        index += (frac + dst_index * (int64_t)dst_incr_frac) / c->src_incr;
+        frac   = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
+  }else{
+    for(dst_index=0; dst_index < dst_size; dst_index++){
+        FELEM *filter= c->filter_bank + c->filter_length*(index & c->phase_mask);
+        int sample_index= index >> c->phase_shift;
+        FELEM2 val=0;
+
+        if(sample_index < 0){
+            for(i=0; i<c->filter_length; i++)
+                val += src[FFABS(sample_index + i) % src_size] * filter[i];
+        }else if(sample_index + c->filter_length > src_size){
+            break;
+        }else if(c->linear){
+            FELEM2 v2=0;
+            for(i=0; i<c->filter_length; i++){
+                val += src[sample_index + i] * (FELEM2)filter[i];
+                v2  += src[sample_index + i] * (FELEM2)filter[i + c->filter_length];
+            }
+            val+=(v2-val)*(FELEML)frac / c->src_incr;
+        }else{
+            for(i=0; i<c->filter_length; i++){
+                val += src[sample_index + i] * (FELEM2)filter[i];
+            }
+        }
+
+#ifdef CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE
+        dst[dst_index] = av_clip_int16(lrintf(val));
+#else
+        val = (val + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;
+        dst[dst_index] = (unsigned)(val + 32768) > 65535 ? (val>>31) ^ 32767 : val;
+#endif
+
+        frac += dst_incr_frac;
+        index += dst_incr;
+        if(frac >= c->src_incr){
+            frac -= c->src_incr;
+            index++;
+        }
+
+        if(dst_index + 1 == compensation_distance){
+            compensation_distance= 0;
+            dst_incr_frac= c->ideal_dst_incr % c->src_incr;
+            dst_incr=      c->ideal_dst_incr / c->src_incr;
+        }
+    }
+  }
+    *consumed= FFMAX(index, 0) >> c->phase_shift;
+    if(index>=0) index &= c->phase_mask;
+
+    if(compensation_distance){
+        compensation_distance -= dst_index;
+        av_assert2(compensation_distance > 0);
+    }
+    if(update_ctx){
+        c->frac= frac;
+        c->index= index;
+        c->dst_incr= dst_incr_frac + c->src_incr*dst_incr;
+        c->compensation_distance= compensation_distance;
+    }
+
+    return dst_index;
+}
+
+#endif
diff --git a/media/ffvpx/libavcodec/reverse.c b/media/ffvpx/libavcodec/reverse.c
new file mode 100644
index 000000000..440badaf3
--- /dev/null
+++ b/media/ffvpx/libavcodec/reverse.c
@@ -0,0 +1 @@
+#include "libavutil/reverse.c"
diff --git a/media/ffvpx/libavcodec/rl.h b/media/ffvpx/libavcodec/rl.h
new file mode 100644
index 000000000..9a767bc5f
--- /dev/null
+++ b/media/ffvpx/libavcodec/rl.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000-2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * rl header.
+ */
+
+#ifndef AVCODEC_RL_H
+#define AVCODEC_RL_H
+
+#include <stdint.h>
+
+#include "vlc.h"
+
+/* run length table */
+#define MAX_RUN    64
+#define MAX_LEVEL  64
+
+/** RLTable. */
+typedef struct RLTable {
+    int n;                         ///< number of entries of table_vlc minus 1
+    int last;                      ///< number of values for last = 0
+    const uint16_t (*table_vlc)[2];
+    const int8_t *table_run;
+    const int8_t *table_level;
+    uint8_t *index_run[2];         ///< encoding only
+    int8_t *max_level[2];          ///< encoding & decoding
+    int8_t *max_run[2];            ///< encoding & decoding
+    RL_VLC_ELEM *rl_vlc[32];       ///< decoding only
+} RLTable;
+
+/**
+ * @param static_store static uint8_t array[2][2*MAX_RUN + MAX_LEVEL + 3] which will hold
+ *                     the level and run tables, if this is NULL av_malloc() will be used
+ */
+int ff_rl_init(RLTable *rl, uint8_t static_store[2][2*MAX_RUN + MAX_LEVEL + 3]);
+void ff_rl_init_vlc(RLTable *rl, unsigned static_size);
+
+/**
+ * Free the contents of a dynamically allocated table.
+ */
+void ff_rl_free(RLTable *rl);
+
+#define INIT_VLC_RL(rl, static_size)\
+{\
+    int q;\
+    static RL_VLC_ELEM rl_vlc_table[32][static_size];\
+\
+    if(!rl.rl_vlc[0]){\
+        for(q=0; q<32; q++)\
+            rl.rl_vlc[q]= rl_vlc_table[q];\
+\
+        ff_rl_init_vlc(&rl, static_size);\
+    }\
+}
+
+static inline int get_rl_index(const RLTable *rl, int last, int run, int level)
+{
+    int index;
+    index = rl->index_run[last][run];
+    if (index >= rl->n)
+        return rl->n;
+    if (level > rl->max_level[last][run])
+        return rl->n;
+    return index + level - 1;
+}
+
+#endif /* AVCODEC_RL_H */
diff --git a/media/ffvpx/libavcodec/rnd_avg.h b/media/ffvpx/libavcodec/rnd_avg.h
new file mode 100644
index 000000000..344775e31
--- /dev/null
+++ b/media/ffvpx/libavcodec/rnd_avg.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ * Copyright (c) 2011 Oskar Arvidsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RND_AVG_H
+#define AVCODEC_RND_AVG_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define BYTE_VEC32(c) ((c) * 0x01010101UL)
+#define BYTE_VEC64(c) ((c) * 0x0001000100010001UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+#endif /* AVCODEC_RND_AVG_H */
diff --git a/media/ffvpx/libavcodec/thread.h b/media/ffvpx/libavcodec/thread.h
new file mode 100644
index 000000000..c848d7ae8
--- /dev/null
+++ b/media/ffvpx/libavcodec/thread.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2008 Alexander Strange <astrange@ithinksw.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Multithreading support functions
+ * @author Alexander Strange <astrange@ithinksw.com>
+ */
+
+#ifndef AVCODEC_THREAD_H
+#define AVCODEC_THREAD_H
+
+#include "libavutil/buffer.h"
+
+#include "config.h"
+#include "avcodec.h"
+
+typedef struct ThreadFrame {
+    AVFrame *f;
+    AVCodecContext *owner;
+    // progress->data is an array of 2 ints holding progress for top/bottom
+    // fields
+    AVBufferRef *progress;
+} ThreadFrame;
+
+/**
+ * Wait for decoding threads to finish and reset internal state.
+ * Called by avcodec_flush_buffers().
+ *
+ * @param avctx The context.
+ */
+void ff_thread_flush(AVCodecContext *avctx);
+
+/**
+ * Submit a new frame to a decoding thread.
+ * Returns the next available frame in picture. *got_picture_ptr
+ * will be 0 if none is available.
+ * The return value on success is the size of the consumed packet for
+ * compatibility with avcodec_decode_video2(). This means the decoder
+ * has to consume the full packet.
+ *
+ * Parameters are the same as avcodec_decode_video2().
+ */
+int ff_thread_decode_frame(AVCodecContext *avctx, AVFrame *picture,
+                           int *got_picture_ptr, AVPacket *avpkt);
+
+/**
+ * If the codec defines update_thread_context(), call this
+ * when they are ready for the next thread to start decoding
+ * the next frame. After calling it, do not change any variables
+ * read by the update_thread_context() method, or call ff_thread_get_buffer().
+ *
+ * @param avctx The context.
+ */
+void ff_thread_finish_setup(AVCodecContext *avctx);
+
+/**
+ * Notify later decoding threads when part of their reference picture is ready.
+ * Call this when some part of the picture is finished decoding.
+ * Later calls with lower values of progress have no effect.
+ *
+ * @param f The picture being decoded.
+ * @param progress Value, in arbitrary units, of how much of the picture has decoded.
+ * @param field The field being decoded, for field-picture codecs.
+ * 0 for top field or frame pictures, 1 for bottom field.
+ */
+void ff_thread_report_progress(ThreadFrame *f, int progress, int field);
+
+/**
+ * Wait for earlier decoding threads to finish reference pictures.
+ * Call this before accessing some part of a picture, with a given
+ * value for progress, and it will return after the responsible decoding
+ * thread calls ff_thread_report_progress() with the same or
+ * higher value for progress.
+ *
+ * @param f The picture being referenced.
+ * @param progress Value, in arbitrary units, to wait for.
+ * @param field The field being referenced, for field-picture codecs.
+ * 0 for top field or frame pictures, 1 for bottom field.
+ */
+void ff_thread_await_progress(ThreadFrame *f, int progress, int field);
+
+/**
+ * Wrapper around get_format() for frame-multithreaded codecs.
+ * Call this function instead of avctx->get_format().
+ * Cannot be called after the codec has called ff_thread_finish_setup().
+ *
+ * @param avctx The current context.
+ * @param fmt The list of available formats.
+ */
+enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt);
+
+/**
+ * Wrapper around get_buffer() for frame-multithreaded codecs.
+ * Call this function instead of ff_get_buffer(f).
+ * Cannot be called after the codec has called ff_thread_finish_setup().
+ *
+ * @param avctx The current context.
+ * @param f The frame to write into.
+ */
+int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags);
+
+/**
+ * Wrapper around release_buffer() frame-for multithreaded codecs.
+ * Call this function instead of avctx->release_buffer(f).
+ * The AVFrame will be copied and the actual release_buffer() call
+ * will be performed later. The contents of data pointed to by the
+ * AVFrame should not be changed until ff_thread_get_buffer() is called
+ * on it.
+ *
+ * @param avctx The current context.
+ * @param f The picture being released.
+ */
+void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f);
+
+int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src);
+
+int ff_thread_init(AVCodecContext *s);
+void ff_thread_free(AVCodecContext *s);
+
+int ff_alloc_entries(AVCodecContext *avctx, int count);
+void ff_reset_entries(AVCodecContext *avctx);
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n);
+void ff_thread_await_progress2(AVCodecContext *avctx,  int field, int thread, int shift);
+
+#endif /* AVCODEC_THREAD_H */
diff --git a/media/ffvpx/libavcodec/unary.h b/media/ffvpx/libavcodec/unary.h
new file mode 100644
index 000000000..908dc9350
--- /dev/null
+++ b/media/ffvpx/libavcodec/unary.h
@@ -0,0 +1,56 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_UNARY_H
+#define AVCODEC_UNARY_H
+
+#include "get_bits.h"
+
+/**
+ * Get unary code of limited length
+ * @param gb GetBitContext
+ * @param[in] stop The bitstop value (unary code of 1's or 0's)
+ * @param[in] len Maximum length
+ * @return Unary length/index
+ */
+static inline int get_unary(GetBitContext *gb, int stop, int len)
+{
+    int i;
+
+    for(i = 0; i < len && get_bits1(gb) != stop; i++);
+    return i;
+}
+
+/**
+ * Get unary code terminated by a 0 with a maximum length of 33
+ * @param gb GetBitContext
+ * @return Unary length/index
+ */
+static inline int get_unary_0_33(GetBitContext *gb)
+{
+    return get_unary(gb, 0, 33);
+}
+
+static inline int get_unary_0_9(GetBitContext *gb)
+{
+    return get_unary(gb, 0, 9);
+}
+
+#endif /* AVCODEC_UNARY_H */
diff --git a/media/ffvpx/libavcodec/utils.c b/media/ffvpx/libavcodec/utils.c
new file mode 100644
index 000000000..f7adb525f
--- /dev/null
+++ b/media/ffvpx/libavcodec/utils.c
@@ -0,0 +1,4230 @@
+/*
+ * utils for libavcodec
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * utils.
+ */
+
+#include "config.h"
+#include "libavutil/atomic.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/crc.h"
+#include "libavutil/frame.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/samplefmt.h"
+#include "libavutil/dict.h"
+#include "libavutil/thread.h"
+#include "avcodec.h"
+#include "libavutil/opt.h"
+#include "me_cmp.h"
+#include "mpegvideo.h"
+#include "thread.h"
+#include "frame_thread_encoder.h"
+#include "internal.h"
+#include "raw.h"
+#include "bytestream.h"
+#include "version.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <float.h>
+#if CONFIG_ICONV
+# include <iconv.h>
+#endif
+
+#include "libavutil/ffversion.h"
+const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
+
+#if HAVE_PTHREADS || HAVE_W32THREADS || HAVE_OS2THREADS
+static int default_lockmgr_cb(void **arg, enum AVLockOp op)
+{
+    void * volatile * mutex = arg;
+    int err;
+
+    switch (op) {
+    case AV_LOCK_CREATE:
+        return 0;
+    case AV_LOCK_OBTAIN:
+        if (!*mutex) {
+            pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t));
+            if (!tmp)
+                return AVERROR(ENOMEM);
+            if ((err = pthread_mutex_init(tmp, NULL))) {
+                av_free(tmp);
+                return AVERROR(err);
+            }
+            if (avpriv_atomic_ptr_cas(mutex, NULL, tmp)) {
+                pthread_mutex_destroy(tmp);
+                av_free(tmp);
+            }
+        }
+
+        if ((err = pthread_mutex_lock(*mutex)))
+            return AVERROR(err);
+
+        return 0;
+    case AV_LOCK_RELEASE:
+        if ((err = pthread_mutex_unlock(*mutex)))
+            return AVERROR(err);
+
+        return 0;
+    case AV_LOCK_DESTROY:
+        if (*mutex)
+            pthread_mutex_destroy(*mutex);
+        av_free(*mutex);
+        avpriv_atomic_ptr_cas(mutex, *mutex, NULL);
+        return 0;
+    }
+    return 1;
+}
+static int (*lockmgr_cb)(void **mutex, enum AVLockOp op) = default_lockmgr_cb;
+#else
+static int (*lockmgr_cb)(void **mutex, enum AVLockOp op) = NULL;
+#endif
+
+
+volatile int ff_avcodec_locked;
+static int volatile entangled_thread_counter = 0;
+static void *codec_mutex;
+static void *avformat_mutex;
+
+void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
+{
+    uint8_t **p = ptr;
+    if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_freep(p);
+        *size = 0;
+        return;
+    }
+    if (!ff_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE, 1))
+        memset(*p + min_size, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+}
+
+void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size)
+{
+    uint8_t **p = ptr;
+    if (min_size > SIZE_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_freep(p);
+        *size = 0;
+        return;
+    }
+    if (!ff_fast_malloc(p, size, min_size + AV_INPUT_BUFFER_PADDING_SIZE, 1))
+        memset(*p, 0, min_size + AV_INPUT_BUFFER_PADDING_SIZE);
+}
+
+/* encoder management */
+static AVCodec *first_avcodec = NULL;
+static AVCodec **last_avcodec = &first_avcodec;
+
+AVCodec *av_codec_next(const AVCodec *c)
+{
+    if (c)
+        return c->next;
+    else
+        return first_avcodec;
+}
+
+static av_cold void avcodec_init(void)
+{
+    static int initialized = 0;
+
+    if (initialized != 0)
+        return;
+    initialized = 1;
+
+    if (CONFIG_ME_CMP)
+        ff_me_cmp_init_static();
+}
+
+int av_codec_is_encoder(const AVCodec *codec)
+{
+    return codec && (codec->encode_sub || codec->encode2 ||codec->send_frame);
+}
+
+int av_codec_is_decoder(const AVCodec *codec)
+{
+    return codec && (codec->decode || codec->send_packet);
+}
+
+av_cold void avcodec_register(AVCodec *codec)
+{
+    AVCodec **p;
+    avcodec_init();
+    p = last_avcodec;
+    codec->next = NULL;
+
+    while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, codec))
+        p = &(*p)->next;
+    last_avcodec = &codec->next;
+
+    if (codec->init_static_data)
+        codec->init_static_data(codec);
+}
+
+#if FF_API_EMU_EDGE
+unsigned avcodec_get_edge_width(void)
+{
+    return EDGE_WIDTH;
+}
+#endif
+
+#if FF_API_SET_DIMENSIONS
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height)
+{
+    int ret = ff_set_dimensions(s, width, height);
+    if (ret < 0) {
+        av_log(s, AV_LOG_WARNING, "Failed to set dimensions %d %d\n", width, height);
+    }
+}
+#endif
+
+int ff_set_dimensions(AVCodecContext *s, int width, int height)
+{
+    int ret = av_image_check_size(width, height, 0, s);
+
+    if (ret < 0)
+        width = height = 0;
+
+    s->coded_width  = width;
+    s->coded_height = height;
+    s->width        = AV_CEIL_RSHIFT(width,  s->lowres);
+    s->height       = AV_CEIL_RSHIFT(height, s->lowres);
+
+    return ret;
+}
+
+int ff_set_sar(AVCodecContext *avctx, AVRational sar)
+{
+    int ret = av_image_check_sar(avctx->width, avctx->height, sar);
+
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %d/%d\n",
+               sar.num, sar.den);
+        avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
+        return ret;
+    } else {
+        avctx->sample_aspect_ratio = sar;
+    }
+    return 0;
+}
+
+int ff_side_data_update_matrix_encoding(AVFrame *frame,
+                                        enum AVMatrixEncoding matrix_encoding)
+{
+    AVFrameSideData *side_data;
+    enum AVMatrixEncoding *data;
+
+    side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_MATRIXENCODING);
+    if (!side_data)
+        side_data = av_frame_new_side_data(frame, AV_FRAME_DATA_MATRIXENCODING,
+                                           sizeof(enum AVMatrixEncoding));
+
+    if (!side_data)
+        return AVERROR(ENOMEM);
+
+    data  = (enum AVMatrixEncoding*)side_data->data;
+    *data = matrix_encoding;
+
+    return 0;
+}
+
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
+                               int linesize_align[AV_NUM_DATA_POINTERS])
+{
+    int i;
+    int w_align = 1;
+    int h_align = 1;
+    AVPixFmtDescriptor const *desc = av_pix_fmt_desc_get(s->pix_fmt);
+
+    if (desc) {
+        w_align = 1 << desc->log2_chroma_w;
+        h_align = 1 << desc->log2_chroma_h;
+    }
+
+    switch (s->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUYV422:
+    case AV_PIX_FMT_YVYU422:
+    case AV_PIX_FMT_UYVY422:
+    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV440P:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_GRAY16BE:
+    case AV_PIX_FMT_GRAY16LE:
+    case AV_PIX_FMT_YUVJ420P:
+    case AV_PIX_FMT_YUVJ422P:
+    case AV_PIX_FMT_YUVJ440P:
+    case AV_PIX_FMT_YUVJ444P:
+    case AV_PIX_FMT_YUVA420P:
+    case AV_PIX_FMT_YUVA422P:
+    case AV_PIX_FMT_YUVA444P:
+    case AV_PIX_FMT_YUV420P9LE:
+    case AV_PIX_FMT_YUV420P9BE:
+    case AV_PIX_FMT_YUV420P10LE:
+    case AV_PIX_FMT_YUV420P10BE:
+    case AV_PIX_FMT_YUV420P12LE:
+    case AV_PIX_FMT_YUV420P12BE:
+    case AV_PIX_FMT_YUV420P14LE:
+    case AV_PIX_FMT_YUV420P14BE:
+    case AV_PIX_FMT_YUV420P16LE:
+    case AV_PIX_FMT_YUV420P16BE:
+    case AV_PIX_FMT_YUVA420P9LE:
+    case AV_PIX_FMT_YUVA420P9BE:
+    case AV_PIX_FMT_YUVA420P10LE:
+    case AV_PIX_FMT_YUVA420P10BE:
+    case AV_PIX_FMT_YUVA420P16LE:
+    case AV_PIX_FMT_YUVA420P16BE:
+    case AV_PIX_FMT_YUV422P9LE:
+    case AV_PIX_FMT_YUV422P9BE:
+    case AV_PIX_FMT_YUV422P10LE:
+    case AV_PIX_FMT_YUV422P10BE:
+    case AV_PIX_FMT_YUV422P12LE:
+    case AV_PIX_FMT_YUV422P12BE:
+    case AV_PIX_FMT_YUV422P14LE:
+    case AV_PIX_FMT_YUV422P14BE:
+    case AV_PIX_FMT_YUV422P16LE:
+    case AV_PIX_FMT_YUV422P16BE:
+    case AV_PIX_FMT_YUVA422P9LE:
+    case AV_PIX_FMT_YUVA422P9BE:
+    case AV_PIX_FMT_YUVA422P10LE:
+    case AV_PIX_FMT_YUVA422P10BE:
+    case AV_PIX_FMT_YUVA422P16LE:
+    case AV_PIX_FMT_YUVA422P16BE:
+    case AV_PIX_FMT_YUV440P10LE:
+    case AV_PIX_FMT_YUV440P10BE:
+    case AV_PIX_FMT_YUV440P12LE:
+    case AV_PIX_FMT_YUV440P12BE:
+    case AV_PIX_FMT_YUV444P9LE:
+    case AV_PIX_FMT_YUV444P9BE:
+    case AV_PIX_FMT_YUV444P10LE:
+    case AV_PIX_FMT_YUV444P10BE:
+    case AV_PIX_FMT_YUV444P12LE:
+    case AV_PIX_FMT_YUV444P12BE:
+    case AV_PIX_FMT_YUV444P14LE:
+    case AV_PIX_FMT_YUV444P14BE:
+    case AV_PIX_FMT_YUV444P16LE:
+    case AV_PIX_FMT_YUV444P16BE:
+    case AV_PIX_FMT_YUVA444P9LE:
+    case AV_PIX_FMT_YUVA444P9BE:
+    case AV_PIX_FMT_YUVA444P10LE:
+    case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA444P16LE:
+    case AV_PIX_FMT_YUVA444P16BE:
+    case AV_PIX_FMT_GBRP9LE:
+    case AV_PIX_FMT_GBRP9BE:
+    case AV_PIX_FMT_GBRP10LE:
+    case AV_PIX_FMT_GBRP10BE:
+    case AV_PIX_FMT_GBRP12LE:
+    case AV_PIX_FMT_GBRP12BE:
+    case AV_PIX_FMT_GBRP14LE:
+    case AV_PIX_FMT_GBRP14BE:
+    case AV_PIX_FMT_GBRP16LE:
+    case AV_PIX_FMT_GBRP16BE:
+    case AV_PIX_FMT_GBRAP12LE:
+    case AV_PIX_FMT_GBRAP12BE:
+    case AV_PIX_FMT_GBRAP16LE:
+    case AV_PIX_FMT_GBRAP16BE:
+        w_align = 16; //FIXME assume 16 pixel per macroblock
+        h_align = 16 * 2; // interlaced needs 2 macroblocks height
+        break;
+    case AV_PIX_FMT_YUV411P:
+    case AV_PIX_FMT_YUVJ411P:
+    case AV_PIX_FMT_UYYVYY411:
+        w_align = 32;
+        h_align = 16 * 2;
+        break;
+    case AV_PIX_FMT_YUV410P:
+        if (s->codec_id == AV_CODEC_ID_SVQ1) {
+            w_align = 64;
+            h_align = 64;
+        }
+        break;
+    case AV_PIX_FMT_RGB555:
+        if (s->codec_id == AV_CODEC_ID_RPZA) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
+    case AV_PIX_FMT_PAL8:
+    case AV_PIX_FMT_BGR8:
+    case AV_PIX_FMT_RGB8:
+        if (s->codec_id == AV_CODEC_ID_SMC ||
+            s->codec_id == AV_CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
+        }
+        if (s->codec_id == AV_CODEC_ID_JV) {
+            w_align = 8;
+            h_align = 8;
+        }
+        break;
+    case AV_PIX_FMT_BGR24:
+        if ((s->codec_id == AV_CODEC_ID_MSZH) ||
+            (s->codec_id == AV_CODEC_ID_ZLIB)) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
+    case AV_PIX_FMT_RGB24:
+        if (s->codec_id == AV_CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
+    default:
+        break;
+    }
+
+    if (s->codec_id == AV_CODEC_ID_IFF_ILBM) {
+        w_align = FFMAX(w_align, 8);
+    }
+
+    *width  = FFALIGN(*width, w_align);
+    *height = FFALIGN(*height, h_align);
+    if (s->codec_id == AV_CODEC_ID_H264 || s->lowres) {
+        // some of the optimized chroma MC reads one line too much
+        // which is also done in mpeg decoders with lowres > 0
+        *height += 2;
+
+        // H.264 uses edge emulation for out of frame motion vectors, for this
+        // it requires a temporary area large enough to hold a 21x21 block,
+        // increasing witdth ensure that the temporary area is large enough,
+        // the next rounded up width is 32
+        *width = FFMAX(*width, 32);
+    }
+
+    for (i = 0; i < 4; i++)
+        linesize_align[i] = STRIDE_ALIGN;
+}
+
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->pix_fmt);
+    int chroma_shift = desc->log2_chroma_w;
+    int linesize_align[AV_NUM_DATA_POINTERS];
+    int align;
+
+    avcodec_align_dimensions2(s, width, height, linesize_align);
+    align               = FFMAX(linesize_align[0], linesize_align[3]);
+    linesize_align[1] <<= chroma_shift;
+    linesize_align[2] <<= chroma_shift;
+    align               = FFMAX3(align, linesize_align[1], linesize_align[2]);
+    *width              = FFALIGN(*width, align);
+}
+
+int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos)
+{
+    if (pos <= AVCHROMA_LOC_UNSPECIFIED || pos >= AVCHROMA_LOC_NB)
+        return AVERROR(EINVAL);
+    pos--;
+
+    *xpos = (pos&1) * 128;
+    *ypos = ((pos>>1)^(pos<4)) * 128;
+
+    return 0;
+}
+
+enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos)
+{
+    int pos, xout, yout;
+
+    for (pos = AVCHROMA_LOC_UNSPECIFIED + 1; pos < AVCHROMA_LOC_NB; pos++) {
+        if (avcodec_enum_to_chroma_pos(&xout, &yout, pos) == 0 && xout == xpos && yout == ypos)
+            return pos;
+    }
+    return AVCHROMA_LOC_UNSPECIFIED;
+}
+
+int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
+                             enum AVSampleFormat sample_fmt, const uint8_t *buf,
+                             int buf_size, int align)
+{
+    int ch, planar, needed_size, ret = 0;
+
+    needed_size = av_samples_get_buffer_size(NULL, nb_channels,
+                                             frame->nb_samples, sample_fmt,
+                                             align);
+    if (buf_size < needed_size)
+        return AVERROR(EINVAL);
+
+    planar = av_sample_fmt_is_planar(sample_fmt);
+    if (planar && nb_channels > AV_NUM_DATA_POINTERS) {
+        if (!(frame->extended_data = av_mallocz_array(nb_channels,
+                                                sizeof(*frame->extended_data))))
+            return AVERROR(ENOMEM);
+    } else {
+        frame->extended_data = frame->data;
+    }
+
+    if ((ret = av_samples_fill_arrays(frame->extended_data, &frame->linesize[0],
+                                      (uint8_t *)(intptr_t)buf, nb_channels, frame->nb_samples,
+                                      sample_fmt, align)) < 0) {
+        if (frame->extended_data != frame->data)
+            av_freep(&frame->extended_data);
+        return ret;
+    }
+    if (frame->extended_data != frame->data) {
+        for (ch = 0; ch < AV_NUM_DATA_POINTERS; ch++)
+            frame->data[ch] = frame->extended_data[ch];
+    }
+
+    return ret;
+}
+
+static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
+{
+    FramePool *pool = avctx->internal->pool;
+    int i, ret;
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO: {
+        uint8_t *data[4];
+        int linesize[4];
+        int size[4] = { 0 };
+        int w = frame->width;
+        int h = frame->height;
+        int tmpsize, unaligned;
+
+        if (pool->format == frame->format &&
+            pool->width == frame->width && pool->height == frame->height)
+            return 0;
+
+        avcodec_align_dimensions2(avctx, &w, &h, pool->stride_align);
+
+        do {
+            // NOTE: do not align linesizes individually, this breaks e.g. assumptions
+            // that linesize[0] == 2*linesize[1] in the MPEG-encoder for 4:2:2
+            ret = av_image_fill_linesizes(linesize, avctx->pix_fmt, w);
+            if (ret < 0)
+                return ret;
+            // increase alignment of w for next try (rhs gives the lowest bit set in w)
+            w += w & ~(w - 1);
+
+            unaligned = 0;
+            for (i = 0; i < 4; i++)
+                unaligned |= linesize[i] % pool->stride_align[i];
+        } while (unaligned);
+
+        tmpsize = av_image_fill_pointers(data, avctx->pix_fmt, h,
+                                         NULL, linesize);
+        if (tmpsize < 0)
+            return -1;
+
+        for (i = 0; i < 3 && data[i + 1]; i++)
+            size[i] = data[i + 1] - data[i];
+        size[i] = tmpsize - (data[i] - data[0]);
+
+        for (i = 0; i < 4; i++) {
+            av_buffer_pool_uninit(&pool->pools[i]);
+            pool->linesize[i] = linesize[i];
+            if (size[i]) {
+                pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1,
+                                                     CONFIG_MEMORY_POISONING ?
+                                                        NULL :
+                                                        av_buffer_allocz);
+                if (!pool->pools[i]) {
+                    ret = AVERROR(ENOMEM);
+                    goto fail;
+                }
+            }
+        }
+        pool->format = frame->format;
+        pool->width  = frame->width;
+        pool->height = frame->height;
+
+        break;
+        }
+    case AVMEDIA_TYPE_AUDIO: {
+        int ch     = av_frame_get_channels(frame); //av_get_channel_layout_nb_channels(frame->channel_layout);
+        int planar = av_sample_fmt_is_planar(frame->format);
+        int planes = planar ? ch : 1;
+
+        if (pool->format == frame->format && pool->planes == planes &&
+            pool->channels == ch && frame->nb_samples == pool->samples)
+            return 0;
+
+        av_buffer_pool_uninit(&pool->pools[0]);
+        ret = av_samples_get_buffer_size(&pool->linesize[0], ch,
+                                         frame->nb_samples, frame->format, 0);
+        if (ret < 0)
+            goto fail;
+
+        pool->pools[0] = av_buffer_pool_init(pool->linesize[0], NULL);
+        if (!pool->pools[0]) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+
+        pool->format     = frame->format;
+        pool->planes     = planes;
+        pool->channels   = ch;
+        pool->samples = frame->nb_samples;
+        break;
+        }
+    default: av_assert0(0);
+    }
+    return 0;
+fail:
+    for (i = 0; i < 4; i++)
+        av_buffer_pool_uninit(&pool->pools[i]);
+    pool->format = -1;
+    pool->planes = pool->channels = pool->samples = 0;
+    pool->width  = pool->height = 0;
+    return ret;
+}
+
+static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+    FramePool *pool = avctx->internal->pool;
+    int planes = pool->planes;
+    int i;
+
+    frame->linesize[0] = pool->linesize[0];
+
+    if (planes > AV_NUM_DATA_POINTERS) {
+        frame->extended_data = av_mallocz_array(planes, sizeof(*frame->extended_data));
+        frame->nb_extended_buf = planes - AV_NUM_DATA_POINTERS;
+        frame->extended_buf  = av_mallocz_array(frame->nb_extended_buf,
+                                          sizeof(*frame->extended_buf));
+        if (!frame->extended_data || !frame->extended_buf) {
+            av_freep(&frame->extended_data);
+            av_freep(&frame->extended_buf);
+            return AVERROR(ENOMEM);
+        }
+    } else {
+        frame->extended_data = frame->data;
+        av_assert0(frame->nb_extended_buf == 0);
+    }
+
+    for (i = 0; i < FFMIN(planes, AV_NUM_DATA_POINTERS); i++) {
+        frame->buf[i] = av_buffer_pool_get(pool->pools[0]);
+        if (!frame->buf[i])
+            goto fail;
+        frame->extended_data[i] = frame->data[i] = frame->buf[i]->data;
+    }
+    for (i = 0; i < frame->nb_extended_buf; i++) {
+        frame->extended_buf[i] = av_buffer_pool_get(pool->pools[0]);
+        if (!frame->extended_buf[i])
+            goto fail;
+        frame->extended_data[i + AV_NUM_DATA_POINTERS] = frame->extended_buf[i]->data;
+    }
+
+    if (avctx->debug & FF_DEBUG_BUFFERS)
+        av_log(avctx, AV_LOG_DEBUG, "default_get_buffer called on frame %p", frame);
+
+    return 0;
+fail:
+    av_frame_unref(frame);
+    return AVERROR(ENOMEM);
+}
+
+static int video_get_buffer(AVCodecContext *s, AVFrame *pic)
+{
+    FramePool *pool = s->internal->pool;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format);
+    int i;
+
+    if (pic->data[0] || pic->data[1] || pic->data[2] || pic->data[3]) {
+        av_log(s, AV_LOG_ERROR, "pic->data[*]!=NULL in avcodec_default_get_buffer\n");
+        return -1;
+    }
+
+    if (!desc) {
+        av_log(s, AV_LOG_ERROR,
+            "Unable to get pixel format descriptor for format %s\n",
+            av_get_pix_fmt_name(pic->format));
+        return AVERROR(EINVAL);
+    }
+
+    memset(pic->data, 0, sizeof(pic->data));
+    pic->extended_data = pic->data;
+
+    for (i = 0; i < 4 && pool->pools[i]; i++) {
+        pic->linesize[i] = pool->linesize[i];
+
+        pic->buf[i] = av_buffer_pool_get(pool->pools[i]);
+        if (!pic->buf[i])
+            goto fail;
+
+        pic->data[i] = pic->buf[i]->data;
+    }
+    for (; i < AV_NUM_DATA_POINTERS; i++) {
+        pic->data[i] = NULL;
+        pic->linesize[i] = 0;
+    }
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
+        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+        avpriv_set_systematic_pal2((uint32_t *)pic->data[1], pic->format);
+
+    if (s->debug & FF_DEBUG_BUFFERS)
+        av_log(s, AV_LOG_DEBUG, "default_get_buffer called on pic %p\n", pic);
+
+    return 0;
+fail:
+    av_frame_unref(pic);
+    return AVERROR(ENOMEM);
+}
+
+void ff_color_frame(AVFrame *frame, const int c[4])
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    int p, y, x;
+
+    av_assert0(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
+
+    for (p = 0; p<desc->nb_components; p++) {
+        uint8_t *dst = frame->data[p];
+        int is_chroma = p == 1 || p == 2;
+        int bytes  = is_chroma ? AV_CEIL_RSHIFT(frame->width,  desc->log2_chroma_w) : frame->width;
+        int height = is_chroma ? AV_CEIL_RSHIFT(frame->height, desc->log2_chroma_h) : frame->height;
+        for (y = 0; y < height; y++) {
+            if (desc->comp[0].depth >= 9) {
+                for (x = 0; x<bytes; x++)
+                    ((uint16_t*)dst)[x] = c[p];
+            }else
+                memset(dst, c[p], bytes);
+            dst += frame->linesize[p];
+        }
+    }
+}
+
+int avcodec_default_get_buffer2(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    int ret;
+
+    if ((ret = update_frame_pool(avctx, frame)) < 0)
+        return ret;
+
+    switch (avctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        return video_get_buffer(avctx, frame);
+    case AVMEDIA_TYPE_AUDIO:
+        return audio_get_buffer(avctx, frame);
+    default:
+        return -1;
+    }
+}
+
+static int add_metadata_from_side_data(AVPacket *avpkt, AVFrame *frame)
+{
+    int size;
+    const uint8_t *side_metadata;
+
+    AVDictionary **frame_md = avpriv_frame_get_metadatap(frame);
+
+    side_metadata = av_packet_get_side_data(avpkt,
+                                            AV_PKT_DATA_STRINGS_METADATA, &size);
+    return av_packet_unpack_dictionary(side_metadata, size, frame_md);
+}
+
+int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVPacket *pkt = avctx->internal->pkt;
+    int i;
+    static const struct {
+        enum AVPacketSideDataType packet;
+        enum AVFrameSideDataType frame;
+    } sd[] = {
+        { AV_PKT_DATA_REPLAYGAIN ,                AV_FRAME_DATA_REPLAYGAIN },
+        { AV_PKT_DATA_DISPLAYMATRIX,              AV_FRAME_DATA_DISPLAYMATRIX },
+        { AV_PKT_DATA_STEREO3D,                   AV_FRAME_DATA_STEREO3D },
+        { AV_PKT_DATA_AUDIO_SERVICE_TYPE,         AV_FRAME_DATA_AUDIO_SERVICE_TYPE },
+        { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA },
+    };
+
+    if (pkt) {
+        frame->pkt_pts = pkt->pts;
+        av_frame_set_pkt_pos     (frame, pkt->pos);
+        av_frame_set_pkt_duration(frame, pkt->duration);
+        av_frame_set_pkt_size    (frame, pkt->size);
+
+        for (i = 0; i < FF_ARRAY_ELEMS(sd); i++) {
+            int size;
+            uint8_t *packet_sd = av_packet_get_side_data(pkt, sd[i].packet, &size);
+            if (packet_sd) {
+                AVFrameSideData *frame_sd = av_frame_new_side_data(frame,
+                                                                   sd[i].frame,
+                                                                   size);
+                if (!frame_sd)
+                    return AVERROR(ENOMEM);
+
+                memcpy(frame_sd->data, packet_sd, size);
+            }
+        }
+        add_metadata_from_side_data(pkt, frame);
+    } else {
+        frame->pkt_pts = AV_NOPTS_VALUE;
+        av_frame_set_pkt_pos     (frame, -1);
+        av_frame_set_pkt_duration(frame, 0);
+        av_frame_set_pkt_size    (frame, -1);
+    }
+    frame->reordered_opaque = avctx->reordered_opaque;
+
+    if (frame->color_primaries == AVCOL_PRI_UNSPECIFIED)
+        frame->color_primaries = avctx->color_primaries;
+    if (frame->color_trc == AVCOL_TRC_UNSPECIFIED)
+        frame->color_trc = avctx->color_trc;
+    if (av_frame_get_colorspace(frame) == AVCOL_SPC_UNSPECIFIED)
+        av_frame_set_colorspace(frame, avctx->colorspace);
+    if (av_frame_get_color_range(frame) == AVCOL_RANGE_UNSPECIFIED)
+        av_frame_set_color_range(frame, avctx->color_range);
+    if (frame->chroma_location == AVCHROMA_LOC_UNSPECIFIED)
+        frame->chroma_location = avctx->chroma_sample_location;
+
+    switch (avctx->codec->type) {
+    case AVMEDIA_TYPE_VIDEO:
+        frame->format              = avctx->pix_fmt;
+        if (!frame->sample_aspect_ratio.num)
+            frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
+
+        if (frame->width && frame->height &&
+            av_image_check_sar(frame->width, frame->height,
+                               frame->sample_aspect_ratio) < 0) {
+            av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
+                   frame->sample_aspect_ratio.num,
+                   frame->sample_aspect_ratio.den);
+            frame->sample_aspect_ratio = (AVRational){ 0, 1 };
+        }
+
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        if (!frame->sample_rate)
+            frame->sample_rate    = avctx->sample_rate;
+        if (frame->format < 0)
+            frame->format         = avctx->sample_fmt;
+        if (!frame->channel_layout) {
+            if (avctx->channel_layout) {
+                 if (av_get_channel_layout_nb_channels(avctx->channel_layout) !=
+                     avctx->channels) {
+                     av_log(avctx, AV_LOG_ERROR, "Inconsistent channel "
+                            "configuration.\n");
+                     return AVERROR(EINVAL);
+                 }
+
+                frame->channel_layout = avctx->channel_layout;
+            } else {
+                if (avctx->channels > FF_SANE_NB_CHANNELS) {
+                    av_log(avctx, AV_LOG_ERROR, "Too many channels: %d.\n",
+                           avctx->channels);
+                    return AVERROR(ENOSYS);
+                }
+            }
+        }
+        av_frame_set_channels(frame, avctx->channels);
+        break;
+    }
+    return 0;
+}
+
+int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
+{
+    return ff_init_buffer_info(avctx, frame);
+}
+
+static void validate_avframe_allocation(AVCodecContext *avctx, AVFrame *frame)
+{
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        int i;
+        int num_planes = av_pix_fmt_count_planes(frame->format);
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+        int flags = desc ? desc->flags : 0;
+        if (num_planes == 1 && (flags & AV_PIX_FMT_FLAG_PAL))
+            num_planes = 2;
+        for (i = 0; i < num_planes; i++) {
+            av_assert0(frame->data[i]);
+        }
+        // For now do not enforce anything for palette of pseudopal formats
+        if (num_planes == 1 && (flags & AV_PIX_FMT_FLAG_PSEUDOPAL))
+            num_planes = 2;
+        // For formats without data like hwaccel allow unused pointers to be non-NULL.
+        for (i = num_planes; num_planes > 0 && i < FF_ARRAY_ELEMS(frame->data); i++) {
+            if (frame->data[i])
+                av_log(avctx, AV_LOG_ERROR, "Buffer returned by get_buffer2() did not zero unused plane pointers\n");
+            frame->data[i] = NULL;
+        }
+    }
+}
+
+static int get_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    const AVHWAccel *hwaccel = avctx->hwaccel;
+    int override_dimensions = 1;
+    int ret;
+
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0 || avctx->pix_fmt<0) {
+            av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n");
+            return AVERROR(EINVAL);
+        }
+
+        if (frame->width <= 0 || frame->height <= 0) {
+            frame->width  = FFMAX(avctx->width,  AV_CEIL_RSHIFT(avctx->coded_width,  avctx->lowres));
+            frame->height = FFMAX(avctx->height, AV_CEIL_RSHIFT(avctx->coded_height, avctx->lowres));
+            override_dimensions = 0;
+        }
+
+        if (frame->data[0] || frame->data[1] || frame->data[2] || frame->data[3]) {
+            av_log(avctx, AV_LOG_ERROR, "pic->data[*]!=NULL in get_buffer_internal\n");
+            return AVERROR(EINVAL);
+        }
+    }
+    ret = ff_decode_frame_props(avctx, frame);
+    if (ret < 0)
+        return ret;
+
+    if (hwaccel) {
+        if (hwaccel->alloc_frame) {
+            ret = hwaccel->alloc_frame(avctx, frame);
+            goto end;
+        }
+    } else
+        avctx->sw_pix_fmt = avctx->pix_fmt;
+
+    ret = avctx->get_buffer2(avctx, frame, flags);
+    if (ret >= 0)
+        validate_avframe_allocation(avctx, frame);
+
+end:
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO && !override_dimensions) {
+        frame->width  = avctx->width;
+        frame->height = avctx->height;
+    }
+
+    return ret;
+}
+
+int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    int ret = get_buffer_internal(avctx, frame, flags);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        frame->width = frame->height = 0;
+    }
+    return ret;
+}
+
+static int reget_buffer_internal(AVCodecContext *avctx, AVFrame *frame)
+{
+    AVFrame *tmp;
+    int ret;
+
+    av_assert0(avctx->codec_type == AVMEDIA_TYPE_VIDEO);
+
+    if (frame->data[0] && (frame->width != avctx->width || frame->height != avctx->height || frame->format != avctx->pix_fmt)) {
+        av_log(avctx, AV_LOG_WARNING, "Picture changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s in reget buffer()\n",
+               frame->width, frame->height, av_get_pix_fmt_name(frame->format), avctx->width, avctx->height, av_get_pix_fmt_name(avctx->pix_fmt));
+        av_frame_unref(frame);
+    }
+
+    ff_init_buffer_info(avctx, frame);
+
+    if (!frame->data[0])
+        return ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
+
+    if (av_frame_is_writable(frame))
+        return ff_decode_frame_props(avctx, frame);
+
+    tmp = av_frame_alloc();
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    av_frame_move_ref(tmp, frame);
+
+    ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
+    if (ret < 0) {
+        av_frame_free(&tmp);
+        return ret;
+    }
+
+    av_frame_copy(frame, tmp);
+    av_frame_free(&tmp);
+
+    return 0;
+}
+
+int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret = reget_buffer_internal(avctx, frame);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    return ret;
+}
+
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2), void *arg, int *ret, int count, int size)
+{
+    int i;
+
+    for (i = 0; i < count; i++) {
+        int r = func(c, (char *)arg + i * size);
+        if (ret)
+            ret[i] = r;
+    }
+    return 0;
+}
+
+int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int jobnr, int threadnr), void *arg, int *ret, int count)
+{
+    int i;
+
+    for (i = 0; i < count; i++) {
+        int r = func(c, arg, i, 0);
+        if (ret)
+            ret[i] = r;
+    }
+    return 0;
+}
+
+enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags,
+                                       unsigned int fourcc)
+{
+    while (tags->pix_fmt >= 0) {
+        if (tags->fourcc == fourcc)
+            return tags->pix_fmt;
+        tags++;
+    }
+    return AV_PIX_FMT_NONE;
+}
+
+static int is_hwaccel_pix_fmt(enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    return desc->flags & AV_PIX_FMT_FLAG_HWACCEL;
+}
+
+enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat *fmt)
+{
+    while (*fmt != AV_PIX_FMT_NONE && is_hwaccel_pix_fmt(*fmt))
+        ++fmt;
+    return fmt[0];
+}
+
+static AVHWAccel *find_hwaccel(enum AVCodecID codec_id,
+                               enum AVPixelFormat pix_fmt)
+{
+    AVHWAccel *hwaccel = NULL;
+
+    while ((hwaccel = av_hwaccel_next(hwaccel)))
+        if (hwaccel->id == codec_id
+            && hwaccel->pix_fmt == pix_fmt)
+            return hwaccel;
+    return NULL;
+}
+
+static int setup_hwaccel(AVCodecContext *avctx,
+                         const enum AVPixelFormat fmt,
+                         const char *name)
+{
+    AVHWAccel *hwa = find_hwaccel(avctx->codec_id, fmt);
+    int ret        = 0;
+
+    if (avctx->active_thread_type & FF_THREAD_FRAME) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Hardware accelerated decoding with frame threading is known to be unstable and its use is discouraged.\n");
+    }
+
+    if (!hwa) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not find an AVHWAccel for the pixel format: %s",
+               name);
+        return AVERROR(ENOENT);
+    }
+
+    if (hwa->capabilities & HWACCEL_CODEC_CAP_EXPERIMENTAL &&
+        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        av_log(avctx, AV_LOG_WARNING, "Ignoring experimental hwaccel: %s\n",
+               hwa->name);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (hwa->priv_data_size) {
+        avctx->internal->hwaccel_priv_data = av_mallocz(hwa->priv_data_size);
+        if (!avctx->internal->hwaccel_priv_data)
+            return AVERROR(ENOMEM);
+    }
+
+    if (hwa->init) {
+        ret = hwa->init(avctx);
+        if (ret < 0) {
+            av_freep(&avctx->internal->hwaccel_priv_data);
+            return ret;
+        }
+    }
+
+    avctx->hwaccel = hwa;
+
+    return 0;
+}
+
+int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
+{
+    const AVPixFmtDescriptor *desc;
+    enum AVPixelFormat *choices;
+    enum AVPixelFormat ret;
+    unsigned n = 0;
+
+    while (fmt[n] != AV_PIX_FMT_NONE)
+        ++n;
+
+    av_assert0(n >= 1);
+    avctx->sw_pix_fmt = fmt[n - 1];
+    av_assert2(!is_hwaccel_pix_fmt(avctx->sw_pix_fmt));
+
+    choices = av_malloc_array(n + 1, sizeof(*choices));
+    if (!choices)
+        return AV_PIX_FMT_NONE;
+
+    memcpy(choices, fmt, (n + 1) * sizeof(*choices));
+
+    for (;;) {
+        if (avctx->hwaccel && avctx->hwaccel->uninit)
+            avctx->hwaccel->uninit(avctx);
+        av_freep(&avctx->internal->hwaccel_priv_data);
+        avctx->hwaccel = NULL;
+
+        ret = avctx->get_format(avctx, choices);
+
+        desc = av_pix_fmt_desc_get(ret);
+        if (!desc) {
+            ret = AV_PIX_FMT_NONE;
+            break;
+        }
+
+        if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL))
+            break;
+#if FF_API_CAP_VDPAU
+        if (avctx->codec->capabilities&AV_CODEC_CAP_HWACCEL_VDPAU)
+            break;
+#endif
+
+        if (!setup_hwaccel(avctx, ret, desc->name))
+            break;
+
+        /* Remove failed hwaccel from choices */
+        for (n = 0; choices[n] != ret; n++)
+            av_assert0(choices[n] != AV_PIX_FMT_NONE);
+
+        do
+            choices[n] = choices[n + 1];
+        while (choices[n++] != AV_PIX_FMT_NONE);
+    }
+
+    av_freep(&choices);
+    return ret;
+}
+
+MAKE_ACCESSORS(AVCodecContext, codec, AVRational, pkt_timebase)
+MAKE_ACCESSORS(AVCodecContext, codec, const AVCodecDescriptor *, codec_descriptor)
+MAKE_ACCESSORS(AVCodecContext, codec, int, lowres)
+MAKE_ACCESSORS(AVCodecContext, codec, int, seek_preroll)
+MAKE_ACCESSORS(AVCodecContext, codec, uint16_t*, chroma_intra_matrix)
+
+unsigned av_codec_get_codec_properties(const AVCodecContext *codec)
+{
+    return codec->properties;
+}
+
+int av_codec_get_max_lowres(const AVCodec *codec)
+{
+    return codec->max_lowres;
+}
+
+int avpriv_codec_get_cap_skip_frame_fill_param(const AVCodec *codec){
+    return !!(codec->caps_internal & FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM);
+}
+
+static void get_subtitle_defaults(AVSubtitle *sub)
+{
+    memset(sub, 0, sizeof(*sub));
+    sub->pts = AV_NOPTS_VALUE;
+}
+
+static int64_t get_bit_rate(AVCodecContext *ctx)
+{
+    int64_t bit_rate;
+    int bits_per_sample;
+
+    switch (ctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+    case AVMEDIA_TYPE_DATA:
+    case AVMEDIA_TYPE_SUBTITLE:
+    case AVMEDIA_TYPE_ATTACHMENT:
+        bit_rate = ctx->bit_rate;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        bits_per_sample = av_get_bits_per_sample(ctx->codec_id);
+        bit_rate = bits_per_sample ? ctx->sample_rate * (int64_t)ctx->channels * bits_per_sample : ctx->bit_rate;
+        break;
+    default:
+        bit_rate = 0;
+        break;
+    }
+    return bit_rate;
+}
+
+int attribute_align_arg ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
+{
+    int ret = 0;
+
+    ff_unlock_avcodec(codec);
+
+    ret = avcodec_open2(avctx, codec, options);
+
+    ff_lock_avcodec(avctx, codec);
+    return ret;
+}
+
+int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
+{
+    int ret = 0;
+    AVDictionary *tmp = NULL;
+    const AVPixFmtDescriptor *pixdesc;
+
+    if (avcodec_is_open(avctx))
+        return 0;
+
+    if ((!codec && !avctx->codec)) {
+        av_log(avctx, AV_LOG_ERROR, "No codec provided to avcodec_open2()\n");
+        return AVERROR(EINVAL);
+    }
+    if ((codec && avctx->codec && codec != avctx->codec)) {
+        av_log(avctx, AV_LOG_ERROR, "This AVCodecContext was allocated for %s, "
+                                    "but %s passed to avcodec_open2()\n", avctx->codec->name, codec->name);
+        return AVERROR(EINVAL);
+    }
+    if (!codec)
+        codec = avctx->codec;
+
+    if (avctx->extradata_size < 0 || avctx->extradata_size >= FF_MAX_EXTRADATA_SIZE)
+        return AVERROR(EINVAL);
+
+    if (options)
+        av_dict_copy(&tmp, *options, 0);
+
+    ret = ff_lock_avcodec(avctx, codec);
+    if (ret < 0)
+        return ret;
+
+    avctx->internal = av_mallocz(sizeof(AVCodecInternal));
+    if (!avctx->internal) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    avctx->internal->pool = av_mallocz(sizeof(*avctx->internal->pool));
+    if (!avctx->internal->pool) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
+    avctx->internal->to_free = av_frame_alloc();
+    if (!avctx->internal->to_free) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
+    avctx->internal->buffer_frame = av_frame_alloc();
+    if (!avctx->internal->buffer_frame) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
+    avctx->internal->buffer_pkt = av_packet_alloc();
+    if (!avctx->internal->buffer_pkt) {
+        ret = AVERROR(ENOMEM);
+        goto free_and_end;
+    }
+
+    if (codec->priv_data_size > 0) {
+        if (!avctx->priv_data) {
+            avctx->priv_data = av_mallocz(codec->priv_data_size);
+            if (!avctx->priv_data) {
+                ret = AVERROR(ENOMEM);
+                goto end;
+            }
+            if (codec->priv_class) {
+                *(const AVClass **)avctx->priv_data = codec->priv_class;
+                av_opt_set_defaults(avctx->priv_data);
+            }
+        }
+        if (codec->priv_class && (ret = av_opt_set_dict(avctx->priv_data, &tmp)) < 0)
+            goto free_and_end;
+    } else {
+        avctx->priv_data = NULL;
+    }
+    if ((ret = av_opt_set_dict(avctx, &tmp)) < 0)
+        goto free_and_end;
+
+    if (avctx->codec_whitelist && av_match_list(codec->name, avctx->codec_whitelist, ',') <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Codec (%s) not on whitelist \'%s\'\n", codec->name, avctx->codec_whitelist);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
+    // only call ff_set_dimensions() for non H.264/VP6F/DXV codecs so as not to overwrite previously setup dimensions
+    if (!(avctx->coded_width && avctx->coded_height && avctx->width && avctx->height &&
+          (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_VP6F || avctx->codec_id == AV_CODEC_ID_DXV))) {
+    if (avctx->coded_width && avctx->coded_height)
+        ret = ff_set_dimensions(avctx, avctx->coded_width, avctx->coded_height);
+    else if (avctx->width && avctx->height)
+        ret = ff_set_dimensions(avctx, avctx->width, avctx->height);
+    if (ret < 0)
+        goto free_and_end;
+    }
+
+    if ((avctx->coded_width || avctx->coded_height || avctx->width || avctx->height)
+        && (  av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx) < 0
+           || av_image_check_size(avctx->width,       avctx->height,       0, avctx) < 0)) {
+        av_log(avctx, AV_LOG_WARNING, "Ignoring invalid width/height values\n");
+        ff_set_dimensions(avctx, 0, 0);
+    }
+
+    if (avctx->width > 0 && avctx->height > 0) {
+        if (av_image_check_sar(avctx->width, avctx->height,
+                               avctx->sample_aspect_ratio) < 0) {
+            av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
+                   avctx->sample_aspect_ratio.num,
+                   avctx->sample_aspect_ratio.den);
+            avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
+        }
+    }
+
+    /* if the decoder init function was already called previously,
+     * free the already allocated subtitle_header before overwriting it */
+    if (av_codec_is_decoder(codec))
+        av_freep(&avctx->subtitle_header);
+
+    if (avctx->channels > FF_SANE_NB_CHANNELS) {
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
+    avctx->codec = codec;
+    if ((avctx->codec_type == AVMEDIA_TYPE_UNKNOWN || avctx->codec_type == codec->type) &&
+        avctx->codec_id == AV_CODEC_ID_NONE) {
+        avctx->codec_type = codec->type;
+        avctx->codec_id   = codec->id;
+    }
+    if (avctx->codec_id != codec->id || (avctx->codec_type != codec->type
+                                         && avctx->codec_type != AVMEDIA_TYPE_ATTACHMENT)) {
+        av_log(avctx, AV_LOG_ERROR, "Codec type or id mismatches\n");
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+    avctx->frame_number = 0;
+    avctx->codec_descriptor = avcodec_descriptor_get(avctx->codec_id);
+
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_EXPERIMENTAL) &&
+        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        const char *codec_string = av_codec_is_encoder(codec) ? "encoder" : "decoder";
+        AVCodec *codec2;
+        av_log(avctx, AV_LOG_ERROR,
+               "The %s '%s' is experimental but experimental codecs are not enabled, "
+               "add '-strict %d' if you want to use it.\n",
+               codec_string, codec->name, FF_COMPLIANCE_EXPERIMENTAL);
+        codec2 = av_codec_is_encoder(codec) ? avcodec_find_encoder(codec->id) : avcodec_find_decoder(codec->id);
+        if (!(codec2->capabilities & AV_CODEC_CAP_EXPERIMENTAL))
+            av_log(avctx, AV_LOG_ERROR, "Alternatively use the non experimental %s '%s'.\n",
+                codec_string, codec2->name);
+        ret = AVERROR_EXPERIMENTAL;
+        goto free_and_end;
+    }
+
+    if (avctx->codec_type == AVMEDIA_TYPE_AUDIO &&
+        (!avctx->time_base.num || !avctx->time_base.den)) {
+        avctx->time_base.num = 1;
+        avctx->time_base.den = avctx->sample_rate;
+    }
+
+    if (!HAVE_THREADS)
+        av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n");
+
+    if (CONFIG_FRAME_THREAD_ENCODER && av_codec_is_encoder(avctx->codec)) {
+        ff_unlock_avcodec(codec); //we will instantiate a few encoders thus kick the counter to prevent false detection of a problem
+        ret = ff_frame_thread_encoder_init(avctx, options ? *options : NULL);
+        ff_lock_avcodec(avctx, codec);
+        if (ret < 0)
+            goto free_and_end;
+    }
+
+    if (HAVE_THREADS
+        && !(avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))) {
+        ret = ff_thread_init(avctx);
+        if (ret < 0) {
+            goto free_and_end;
+        }
+    }
+    if (!HAVE_THREADS && !(codec->capabilities & AV_CODEC_CAP_AUTO_THREADS))
+        avctx->thread_count = 1;
+
+    if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) {
+        av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n",
+               avctx->codec->max_lowres);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
+#if FF_API_VISMV
+    if (avctx->debug_mv)
+        av_log(avctx, AV_LOG_WARNING, "The 'vismv' option is deprecated, "
+               "see the codecview filter instead.\n");
+#endif
+
+    if (av_codec_is_encoder(avctx->codec)) {
+        int i;
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        avctx->coded_frame = av_frame_alloc();
+        if (!avctx->coded_frame) {
+            ret = AVERROR(ENOMEM);
+            goto free_and_end;
+        }
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+        if (avctx->time_base.num <= 0 || avctx->time_base.den <= 0) {
+            av_log(avctx, AV_LOG_ERROR, "The encoder timebase is not set.\n");
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
+
+        if (avctx->codec->sample_fmts) {
+            for (i = 0; avctx->codec->sample_fmts[i] != AV_SAMPLE_FMT_NONE; i++) {
+                if (avctx->sample_fmt == avctx->codec->sample_fmts[i])
+                    break;
+                if (avctx->channels == 1 &&
+                    av_get_planar_sample_fmt(avctx->sample_fmt) ==
+                    av_get_planar_sample_fmt(avctx->codec->sample_fmts[i])) {
+                    avctx->sample_fmt = avctx->codec->sample_fmts[i];
+                    break;
+                }
+            }
+            if (avctx->codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) {
+                char buf[128];
+                snprintf(buf, sizeof(buf), "%d", avctx->sample_fmt);
+                av_log(avctx, AV_LOG_ERROR, "Specified sample format %s is invalid or not supported\n",
+                       (char *)av_x_if_null(av_get_sample_fmt_name(avctx->sample_fmt), buf));
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+        if (avctx->codec->pix_fmts) {
+            for (i = 0; avctx->codec->pix_fmts[i] != AV_PIX_FMT_NONE; i++)
+                if (avctx->pix_fmt == avctx->codec->pix_fmts[i])
+                    break;
+            if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_NONE
+                && !((avctx->codec_id == AV_CODEC_ID_MJPEG || avctx->codec_id == AV_CODEC_ID_LJPEG)
+                     && avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL)) {
+                char buf[128];
+                snprintf(buf, sizeof(buf), "%d", avctx->pix_fmt);
+                av_log(avctx, AV_LOG_ERROR, "Specified pixel format %s is invalid or not supported\n",
+                       (char *)av_x_if_null(av_get_pix_fmt_name(avctx->pix_fmt), buf));
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+            if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ420P ||
+                avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ411P ||
+                avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ422P ||
+                avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ440P ||
+                avctx->codec->pix_fmts[i] == AV_PIX_FMT_YUVJ444P)
+                avctx->color_range = AVCOL_RANGE_JPEG;
+        }
+        if (avctx->codec->supported_samplerates) {
+            for (i = 0; avctx->codec->supported_samplerates[i] != 0; i++)
+                if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
+                    break;
+            if (avctx->codec->supported_samplerates[i] == 0) {
+                av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n",
+                       avctx->sample_rate);
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+        if (avctx->sample_rate < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n",
+                    avctx->sample_rate);
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
+        if (avctx->codec->channel_layouts) {
+            if (!avctx->channel_layout) {
+                av_log(avctx, AV_LOG_WARNING, "Channel layout not specified\n");
+            } else {
+                for (i = 0; avctx->codec->channel_layouts[i] != 0; i++)
+                    if (avctx->channel_layout == avctx->codec->channel_layouts[i])
+                        break;
+                if (avctx->codec->channel_layouts[i] == 0) {
+                    char buf[512];
+                    av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
+                    av_log(avctx, AV_LOG_ERROR, "Specified channel layout '%s' is not supported\n", buf);
+                    ret = AVERROR(EINVAL);
+                    goto free_and_end;
+                }
+            }
+        }
+        if (avctx->channel_layout && avctx->channels) {
+            int channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
+            if (channels != avctx->channels) {
+                char buf[512];
+                av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
+                av_log(avctx, AV_LOG_ERROR,
+                       "Channel layout '%s' with %d channels does not match number of specified channels %d\n",
+                       buf, channels, avctx->channels);
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        } else if (avctx->channel_layout) {
+            avctx->channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
+        }
+        if (avctx->channels < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Specified number of channels %d is not supported\n",
+                    avctx->channels);
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
+        if(avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+            pixdesc = av_pix_fmt_desc_get(avctx->pix_fmt);
+            if (    avctx->bits_per_raw_sample < 0
+                || (avctx->bits_per_raw_sample > 8 && pixdesc->comp[0].depth <= 8)) {
+                av_log(avctx, AV_LOG_WARNING, "Specified bit depth %d not possible with the specified pixel formats depth %d\n",
+                    avctx->bits_per_raw_sample, pixdesc->comp[0].depth);
+                avctx->bits_per_raw_sample = pixdesc->comp[0].depth;
+            }
+            if (avctx->width <= 0 || avctx->height <= 0) {
+                av_log(avctx, AV_LOG_ERROR, "dimensions not set\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+        if (   (avctx->codec_type == AVMEDIA_TYPE_VIDEO || avctx->codec_type == AVMEDIA_TYPE_AUDIO)
+            && avctx->bit_rate>0 && avctx->bit_rate<1000) {
+            av_log(avctx, AV_LOG_WARNING, "Bitrate %"PRId64" is extremely low, maybe you mean %"PRId64"k\n", (int64_t)avctx->bit_rate, (int64_t)avctx->bit_rate);
+        }
+
+        if (!avctx->rc_initial_buffer_occupancy)
+            avctx->rc_initial_buffer_occupancy = avctx->rc_buffer_size * 3 / 4;
+
+        if (avctx->ticks_per_frame && avctx->time_base.num &&
+            avctx->ticks_per_frame > INT_MAX / avctx->time_base.num) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "ticks_per_frame %d too large for the timebase %d/%d.",
+                   avctx->ticks_per_frame,
+                   avctx->time_base.num,
+                   avctx->time_base.den);
+            goto free_and_end;
+        }
+
+        if (avctx->hw_frames_ctx) {
+            AVHWFramesContext *frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
+            if (frames_ctx->format != avctx->pix_fmt) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Mismatching AVCodecContext.pix_fmt and AVHWFramesContext.format\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+    }
+
+    avctx->pts_correction_num_faulty_pts =
+    avctx->pts_correction_num_faulty_dts = 0;
+    avctx->pts_correction_last_pts =
+    avctx->pts_correction_last_dts = INT64_MIN;
+
+    if (   !CONFIG_GRAY && avctx->flags & AV_CODEC_FLAG_GRAY
+        && avctx->codec_descriptor->type == AVMEDIA_TYPE_VIDEO)
+        av_log(avctx, AV_LOG_WARNING,
+               "gray decoding requested but not enabled at configuration time\n");
+
+    if (   avctx->codec->init && (!(avctx->active_thread_type&FF_THREAD_FRAME)
+        || avctx->internal->frame_thread_encoder)) {
+        ret = avctx->codec->init(avctx);
+        if (ret < 0) {
+            goto free_and_end;
+        }
+    }
+
+    ret=0;
+
+#if FF_API_AUDIOENC_DELAY
+    if (av_codec_is_encoder(avctx->codec))
+        avctx->delay = avctx->initial_padding;
+#endif
+
+    if (av_codec_is_decoder(avctx->codec)) {
+        if (!avctx->bit_rate)
+            avctx->bit_rate = get_bit_rate(avctx);
+        /* validate channel layout from the decoder */
+        if (avctx->channel_layout) {
+            int channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
+            if (!avctx->channels)
+                avctx->channels = channels;
+            else if (channels != avctx->channels) {
+                char buf[512];
+                av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
+                av_log(avctx, AV_LOG_WARNING,
+                       "Channel layout '%s' with %d channels does not match specified number of channels %d: "
+                       "ignoring specified channel layout\n",
+                       buf, channels, avctx->channels);
+                avctx->channel_layout = 0;
+            }
+        }
+        if (avctx->channels && avctx->channels < 0 ||
+            avctx->channels > FF_SANE_NB_CHANNELS) {
+            ret = AVERROR(EINVAL);
+            goto free_and_end;
+        }
+        if (avctx->sub_charenc) {
+            if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) {
+                av_log(avctx, AV_LOG_ERROR, "Character encoding is only "
+                       "supported with subtitles codecs\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) {
+                av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, "
+                       "subtitles character encoding will be ignored\n",
+                       avctx->codec_descriptor->name);
+                avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
+            } else {
+                /* input character encoding is set for a text based subtitle
+                 * codec at this point */
+                if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC)
+                    avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER;
+
+                if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) {
+#if CONFIG_ICONV
+                    iconv_t cd = iconv_open("UTF-8", avctx->sub_charenc);
+                    if (cd == (iconv_t)-1) {
+                        ret = AVERROR(errno);
+                        av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
+                               "with input character encoding \"%s\"\n", avctx->sub_charenc);
+                        goto free_and_end;
+                    }
+                    iconv_close(cd);
+#else
+                    av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles "
+                           "conversion needs a libavcodec built with iconv support "
+                           "for this codec\n");
+                    ret = AVERROR(ENOSYS);
+                    goto free_and_end;
+#endif
+                }
+            }
+        }
+
+#if FF_API_AVCTX_TIMEBASE
+        if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
+            avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
+#endif
+    }
+    if (codec->priv_data_size > 0 && avctx->priv_data && codec->priv_class) {
+        av_assert0(*(const AVClass **)avctx->priv_data == codec->priv_class);
+    }
+
+end:
+    ff_unlock_avcodec(codec);
+    if (options) {
+        av_dict_free(options);
+        *options = tmp;
+    }
+
+    return ret;
+free_and_end:
+    if (avctx->codec &&
+        (avctx->codec->caps_internal & FF_CODEC_CAP_INIT_CLEANUP))
+        avctx->codec->close(avctx);
+
+    if (codec->priv_class && codec->priv_data_size)
+        av_opt_free(avctx->priv_data);
+    av_opt_free(avctx);
+
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+    av_frame_free(&avctx->coded_frame);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    av_dict_free(&tmp);
+    av_freep(&avctx->priv_data);
+    if (avctx->internal) {
+        av_packet_free(&avctx->internal->buffer_pkt);
+        av_frame_free(&avctx->internal->buffer_frame);
+        av_frame_free(&avctx->internal->to_free);
+        av_freep(&avctx->internal->pool);
+    }
+    av_freep(&avctx->internal);
+    avctx->codec = NULL;
+    goto end;
+}
+
+int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
+{
+    if (avpkt->size < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid negative user packet size %d\n", avpkt->size);
+        return AVERROR(EINVAL);
+    }
+    if (size < 0 || size > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid minimum required packet size %"PRId64" (max allowed is %d)\n",
+               size, INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE);
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx && 2*min_size < size) { // FIXME The factor needs to be finetuned
+        av_assert0(!avpkt->data || avpkt->data != avctx->internal->byte_buffer);
+        if (!avpkt->data || avpkt->size < size) {
+            av_fast_padded_malloc(&avctx->internal->byte_buffer, &avctx->internal->byte_buffer_size, size);
+            avpkt->data = avctx->internal->byte_buffer;
+            avpkt->size = avctx->internal->byte_buffer_size;
+        }
+    }
+
+    if (avpkt->data) {
+        AVBufferRef *buf = avpkt->buf;
+
+        if (avpkt->size < size) {
+            av_log(avctx, AV_LOG_ERROR, "User packet is too small (%d < %"PRId64")\n", avpkt->size, size);
+            return AVERROR(EINVAL);
+        }
+
+        av_init_packet(avpkt);
+        avpkt->buf      = buf;
+        avpkt->size     = size;
+        return 0;
+    } else {
+        int ret = av_new_packet(avpkt, size);
+        if (ret < 0)
+            av_log(avctx, AV_LOG_ERROR, "Failed to allocate packet of size %"PRId64"\n", size);
+        return ret;
+    }
+}
+
+int ff_alloc_packet(AVPacket *avpkt, int size)
+{
+    return ff_alloc_packet2(NULL, avpkt, size, 0);
+}
+
+/**
+ * Pad last frame with silence.
+ */
+static int pad_last_frame(AVCodecContext *s, AVFrame **dst, const AVFrame *src)
+{
+    AVFrame *frame = NULL;
+    int ret;
+
+    if (!(frame = av_frame_alloc()))
+        return AVERROR(ENOMEM);
+
+    frame->format         = src->format;
+    frame->channel_layout = src->channel_layout;
+    av_frame_set_channels(frame, av_frame_get_channels(src));
+    frame->nb_samples     = s->frame_size;
+    ret = av_frame_get_buffer(frame, 32);
+    if (ret < 0)
+        goto fail;
+
+    ret = av_frame_copy_props(frame, src);
+    if (ret < 0)
+        goto fail;
+
+    if ((ret = av_samples_copy(frame->extended_data, src->extended_data, 0, 0,
+                               src->nb_samples, s->channels, s->sample_fmt)) < 0)
+        goto fail;
+    if ((ret = av_samples_set_silence(frame->extended_data, src->nb_samples,
+                                      frame->nb_samples - src->nb_samples,
+                                      s->channels, s->sample_fmt)) < 0)
+        goto fail;
+
+    *dst = frame;
+
+    return 0;
+
+fail:
+    av_frame_free(&frame);
+    return ret;
+}
+
+int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
+                                              AVPacket *avpkt,
+                                              const AVFrame *frame,
+                                              int *got_packet_ptr)
+{
+    AVFrame *extended_frame = NULL;
+    AVFrame *padded_frame = NULL;
+    int ret;
+    AVPacket user_pkt = *avpkt;
+    int needs_realloc = !user_pkt.data;
+
+    *got_packet_ptr = 0;
+
+    if (!avctx->codec->encode2) {
+        av_log(avctx, AV_LOG_ERROR, "This encoder requires using the avcodec_send_frame() API.\n");
+        return AVERROR(ENOSYS);
+    }
+
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
+        av_packet_unref(avpkt);
+        av_init_packet(avpkt);
+        return 0;
+    }
+
+    /* ensure that extended_data is properly set */
+    if (frame && !frame->extended_data) {
+        if (av_sample_fmt_is_planar(avctx->sample_fmt) &&
+            avctx->channels > AV_NUM_DATA_POINTERS) {
+            av_log(avctx, AV_LOG_ERROR, "Encoding to a planar sample format, "
+                                        "with more than %d channels, but extended_data is not set.\n",
+                   AV_NUM_DATA_POINTERS);
+            return AVERROR(EINVAL);
+        }
+        av_log(avctx, AV_LOG_WARNING, "extended_data is not set.\n");
+
+        extended_frame = av_frame_alloc();
+        if (!extended_frame)
+            return AVERROR(ENOMEM);
+
+        memcpy(extended_frame, frame, sizeof(AVFrame));
+        extended_frame->extended_data = extended_frame->data;
+        frame = extended_frame;
+    }
+
+    /* extract audio service type metadata */
+    if (frame) {
+        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_AUDIO_SERVICE_TYPE);
+        if (sd && sd->size >= sizeof(enum AVAudioServiceType))
+            avctx->audio_service_type = *(enum AVAudioServiceType*)sd->data;
+    }
+
+    /* check for valid frame size */
+    if (frame) {
+        if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
+            if (frame->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "more samples than frame size (avcodec_encode_audio2)\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+        } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
+            if (frame->nb_samples < avctx->frame_size &&
+                !avctx->internal->last_audio_frame) {
+                ret = pad_last_frame(avctx, &padded_frame, frame);
+                if (ret < 0)
+                    goto end;
+
+                frame = padded_frame;
+                avctx->internal->last_audio_frame = 1;
+            }
+
+            if (frame->nb_samples != avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) != frame_size (%d) (avcodec_encode_audio2)\n", frame->nb_samples, avctx->frame_size);
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
+        }
+    }
+
+    av_assert0(avctx->codec->encode2);
+
+    ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+    if (!ret) {
+        if (*got_packet_ptr) {
+            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY)) {
+                if (avpkt->pts == AV_NOPTS_VALUE)
+                    avpkt->pts = frame->pts;
+                if (!avpkt->duration)
+                    avpkt->duration = ff_samples_to_time_base(avctx,
+                                                              frame->nb_samples);
+            }
+            avpkt->dts = avpkt->pts;
+        } else {
+            avpkt->size = 0;
+        }
+    }
+    if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
+        needs_realloc = 0;
+        if (user_pkt.data) {
+            if (user_pkt.size >= avpkt->size) {
+                memcpy(user_pkt.data, avpkt->data, avpkt->size);
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "Provided packet is too small, needs to be %d\n", avpkt->size);
+                avpkt->size = user_pkt.size;
+                ret = -1;
+            }
+            avpkt->buf      = user_pkt.buf;
+            avpkt->data     = user_pkt.data;
+        } else {
+            if (av_dup_packet(avpkt) < 0) {
+                ret = AVERROR(ENOMEM);
+            }
+        }
+    }
+
+    if (!ret) {
+        if (needs_realloc && avpkt->data) {
+            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (ret >= 0)
+                avpkt->data = avpkt->buf->data;
+        }
+
+        avctx->frame_number++;
+    }
+
+    if (ret < 0 || !*got_packet_ptr) {
+        av_packet_unref(avpkt);
+        av_init_packet(avpkt);
+        goto end;
+    }
+
+    /* NOTE: if we add any audio encoders which output non-keyframe packets,
+     *       this needs to be moved to the encoders, but for now we can do it
+     *       here to simplify things */
+    avpkt->flags |= AV_PKT_FLAG_KEY;
+
+end:
+    av_frame_free(&padded_frame);
+    av_free(extended_frame);
+
+#if FF_API_AUDIOENC_DELAY
+    avctx->delay = avctx->initial_padding;
+#endif
+
+    return ret;
+}
+
+int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
+                                              AVPacket *avpkt,
+                                              const AVFrame *frame,
+                                              int *got_packet_ptr)
+{
+    int ret;
+    AVPacket user_pkt = *avpkt;
+    int needs_realloc = !user_pkt.data;
+
+    *got_packet_ptr = 0;
+
+    if (!avctx->codec->encode2) {
+        av_log(avctx, AV_LOG_ERROR, "This encoder requires using the avcodec_send_frame() API.\n");
+        return AVERROR(ENOSYS);
+    }
+
+    if(CONFIG_FRAME_THREAD_ENCODER &&
+       avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))
+        return ff_thread_video_encode_frame(avctx, avpkt, frame, got_packet_ptr);
+
+    if ((avctx->flags&AV_CODEC_FLAG_PASS1) && avctx->stats_out)
+        avctx->stats_out[0] = '\0';
+
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY) && !frame) {
+        av_packet_unref(avpkt);
+        av_init_packet(avpkt);
+        avpkt->size = 0;
+        return 0;
+    }
+
+    if (av_image_check_size(avctx->width, avctx->height, 0, avctx))
+        return AVERROR(EINVAL);
+
+    if (frame && frame->format == AV_PIX_FMT_NONE)
+        av_log(avctx, AV_LOG_WARNING, "AVFrame.format is not set\n");
+    if (frame && (frame->width == 0 || frame->height == 0))
+        av_log(avctx, AV_LOG_WARNING, "AVFrame.width or height is not set\n");
+
+    av_assert0(avctx->codec->encode2);
+
+    ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+    av_assert0(ret <= 0);
+
+    if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
+        needs_realloc = 0;
+        if (user_pkt.data) {
+            if (user_pkt.size >= avpkt->size) {
+                memcpy(user_pkt.data, avpkt->data, avpkt->size);
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "Provided packet is too small, needs to be %d\n", avpkt->size);
+                avpkt->size = user_pkt.size;
+                ret = -1;
+            }
+            avpkt->buf      = user_pkt.buf;
+            avpkt->data     = user_pkt.data;
+        } else {
+            if (av_dup_packet(avpkt) < 0) {
+                ret = AVERROR(ENOMEM);
+            }
+        }
+    }
+
+    if (!ret) {
+        if (!*got_packet_ptr)
+            avpkt->size = 0;
+        else if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+            avpkt->pts = avpkt->dts = frame->pts;
+
+        if (needs_realloc && avpkt->data) {
+            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
+            if (ret >= 0)
+                avpkt->data = avpkt->buf->data;
+        }
+
+        avctx->frame_number++;
+    }
+
+    if (ret < 0 || !*got_packet_ptr)
+        av_packet_unref(avpkt);
+
+    emms_c();
+    return ret;
+}
+
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub)
+{
+    int ret;
+    if (sub->start_display_time) {
+        av_log(avctx, AV_LOG_ERROR, "start_display_time must be 0.\n");
+        return -1;
+    }
+
+    ret = avctx->codec->encode_sub(avctx, buf, buf_size, sub);
+    avctx->frame_number++;
+    return ret;
+}
+
+/**
+ * Attempt to guess proper monotonic timestamps for decoded video frames
+ * which might have incorrect times. Input timestamps may wrap around, in
+ * which case the output will as well.
+ *
+ * @param pts the pts field of the decoded AVPacket, as passed through
+ * AVFrame.pkt_pts
+ * @param dts the dts field of the decoded AVPacket
+ * @return one of the input values, may be AV_NOPTS_VALUE
+ */
+static int64_t guess_correct_pts(AVCodecContext *ctx,
+                                 int64_t reordered_pts, int64_t dts)
+{
+    int64_t pts = AV_NOPTS_VALUE;
+
+    if (dts != AV_NOPTS_VALUE) {
+        ctx->pts_correction_num_faulty_dts += dts <= ctx->pts_correction_last_dts;
+        ctx->pts_correction_last_dts = dts;
+    } else if (reordered_pts != AV_NOPTS_VALUE)
+        ctx->pts_correction_last_dts = reordered_pts;
+
+    if (reordered_pts != AV_NOPTS_VALUE) {
+        ctx->pts_correction_num_faulty_pts += reordered_pts <= ctx->pts_correction_last_pts;
+        ctx->pts_correction_last_pts = reordered_pts;
+    } else if(dts != AV_NOPTS_VALUE)
+        ctx->pts_correction_last_pts = dts;
+
+    if ((ctx->pts_correction_num_faulty_pts<=ctx->pts_correction_num_faulty_dts || dts == AV_NOPTS_VALUE)
+       && reordered_pts != AV_NOPTS_VALUE)
+        pts = reordered_pts;
+    else
+        pts = dts;
+
+    return pts;
+}
+
+static int apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    int size = 0, ret;
+    const uint8_t *data;
+    uint32_t flags;
+    int64_t val;
+
+    data = av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, &size);
+    if (!data)
+        return 0;
+
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE)) {
+        av_log(avctx, AV_LOG_ERROR, "This decoder does not support parameter "
+               "changes, but PARAM_CHANGE side data was sent to it.\n");
+        ret = AVERROR(EINVAL);
+        goto fail2;
+    }
+
+    if (size < 4)
+        goto fail;
+
+    flags = bytestream_get_le32(&data);
+    size -= 4;
+
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT) {
+        if (size < 4)
+            goto fail;
+        val = bytestream_get_le32(&data);
+        if (val <= 0 || val > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid channel count");
+            ret = AVERROR_INVALIDDATA;
+            goto fail2;
+        }
+        avctx->channels = val;
+        size -= 4;
+    }
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT) {
+        if (size < 8)
+            goto fail;
+        avctx->channel_layout = bytestream_get_le64(&data);
+        size -= 8;
+    }
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE) {
+        if (size < 4)
+            goto fail;
+        val = bytestream_get_le32(&data);
+        if (val <= 0 || val > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid sample rate");
+            ret = AVERROR_INVALIDDATA;
+            goto fail2;
+        }
+        avctx->sample_rate = val;
+        size -= 4;
+    }
+    if (flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS) {
+        if (size < 8)
+            goto fail;
+        avctx->width  = bytestream_get_le32(&data);
+        avctx->height = bytestream_get_le32(&data);
+        size -= 8;
+        ret = ff_set_dimensions(avctx, avctx->width, avctx->height);
+        if (ret < 0)
+            goto fail2;
+    }
+
+    return 0;
+fail:
+    av_log(avctx, AV_LOG_ERROR, "PARAM_CHANGE side data too small.\n");
+    ret = AVERROR_INVALIDDATA;
+fail2:
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n");
+        if (avctx->err_recognition & AV_EF_EXPLODE)
+            return ret;
+    }
+    return 0;
+}
+
+static int unrefcount_frame(AVCodecInternal *avci, AVFrame *frame)
+{
+    int ret;
+
+    /* move the original frame to our backup */
+    av_frame_unref(avci->to_free);
+    av_frame_move_ref(avci->to_free, frame);
+
+    /* now copy everything except the AVBufferRefs back
+     * note that we make a COPY of the side data, so calling av_frame_free() on
+     * the caller's frame will work properly */
+    ret = av_frame_copy_props(frame, avci->to_free);
+    if (ret < 0)
+        return ret;
+
+    memcpy(frame->data,     avci->to_free->data,     sizeof(frame->data));
+    memcpy(frame->linesize, avci->to_free->linesize, sizeof(frame->linesize));
+    if (avci->to_free->extended_data != avci->to_free->data) {
+        int planes = av_frame_get_channels(avci->to_free);
+        int size   = planes * sizeof(*frame->extended_data);
+
+        if (!size) {
+            av_frame_unref(frame);
+            return AVERROR_BUG;
+        }
+
+        frame->extended_data = av_malloc(size);
+        if (!frame->extended_data) {
+            av_frame_unref(frame);
+            return AVERROR(ENOMEM);
+        }
+        memcpy(frame->extended_data, avci->to_free->extended_data,
+               size);
+    } else
+        frame->extended_data = frame->data;
+
+    frame->format         = avci->to_free->format;
+    frame->width          = avci->to_free->width;
+    frame->height         = avci->to_free->height;
+    frame->channel_layout = avci->to_free->channel_layout;
+    frame->nb_samples     = avci->to_free->nb_samples;
+    av_frame_set_channels(frame, av_frame_get_channels(avci->to_free));
+
+    return 0;
+}
+
+int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
+                                              int *got_picture_ptr,
+                                              const AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret;
+    // copy to ensure we do not change avpkt
+    AVPacket tmp = *avpkt;
+
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+    if (avctx->codec->type != AVMEDIA_TYPE_VIDEO) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for video\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!avctx->codec->decode) {
+        av_log(avctx, AV_LOG_ERROR, "This decoder requires using the avcodec_send_packet() API.\n");
+        return AVERROR(ENOSYS);
+    }
+
+    *got_picture_ptr = 0;
+    if ((avctx->coded_width || avctx->coded_height) && av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx))
+        return AVERROR(EINVAL);
+
+    avctx->internal->pkt = avpkt;
+    ret = apply_param_change(avctx, avpkt);
+    if (ret < 0)
+        return ret;
+
+    av_frame_unref(picture);
+
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size ||
+        (avctx->active_thread_type & FF_THREAD_FRAME)) {
+        int did_split = av_packet_split_side_data(&tmp);
+        ret = apply_param_change(avctx, &tmp);
+        if (ret < 0)
+            goto fail;
+
+        avctx->internal->pkt = &tmp;
+        if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
+            ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr,
+                                         &tmp);
+        else {
+            ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
+                                       &tmp);
+            if (!(avctx->codec->caps_internal & FF_CODEC_CAP_SETS_PKT_DTS))
+                picture->pkt_dts = avpkt->dts;
+
+            if(!avctx->has_b_frames){
+                av_frame_set_pkt_pos(picture, avpkt->pos);
+            }
+            //FIXME these should be under if(!avctx->has_b_frames)
+            /* get_buffer is supposed to set frame parameters */
+            if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1)) {
+                if (!picture->sample_aspect_ratio.num)    picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
+                if (!picture->width)                      picture->width               = avctx->width;
+                if (!picture->height)                     picture->height              = avctx->height;
+                if (picture->format == AV_PIX_FMT_NONE)   picture->format              = avctx->pix_fmt;
+            }
+        }
+
+fail:
+        emms_c(); //needed to avoid an emms_c() call before every return;
+
+        avctx->internal->pkt = NULL;
+        if (did_split) {
+            av_packet_free_side_data(&tmp);
+            if(ret == tmp.size)
+                ret = avpkt->size;
+        }
+
+        if (*got_picture_ptr) {
+            if (!avctx->refcounted_frames) {
+                int err = unrefcount_frame(avci, picture);
+                if (err < 0)
+                    return err;
+            }
+
+            avctx->frame_number++;
+            av_frame_set_best_effort_timestamp(picture,
+                                               guess_correct_pts(avctx,
+                                                                 picture->pkt_pts,
+                                                                 picture->pkt_dts));
+        } else
+            av_frame_unref(picture);
+    } else
+        ret = 0;
+
+    /* many decoders assign whole AVFrames, thus overwriting extended_data;
+     * make sure it's set correctly */
+    av_assert0(!picture->extended_data || picture->extended_data == picture->data);
+
+#if FF_API_AVCTX_TIMEBASE
+    if (avctx->framerate.num > 0 && avctx->framerate.den > 0)
+        avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
+#endif
+
+    return ret;
+}
+
+int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
+                                              AVFrame *frame,
+                                              int *got_frame_ptr,
+                                              const AVPacket *avpkt)
+{
+    AVCodecInternal *avci = avctx->internal;
+    int ret = 0;
+
+    *got_frame_ptr = 0;
+
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+
+    if (!avctx->codec->decode) {
+        av_log(avctx, AV_LOG_ERROR, "This decoder requires using the avcodec_send_packet() API.\n");
+        return AVERROR(ENOSYS);
+    }
+
+    if (!avpkt->data && avpkt->size) {
+        av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n");
+        return AVERROR(EINVAL);
+    }
+    if (avctx->codec->type != AVMEDIA_TYPE_AUDIO) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for audio\n");
+        return AVERROR(EINVAL);
+    }
+
+    av_frame_unref(frame);
+
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type & FF_THREAD_FRAME)) {
+        uint8_t *side;
+        int side_size;
+        uint32_t discard_padding = 0;
+        uint8_t skip_reason = 0;
+        uint8_t discard_reason = 0;
+        // copy to ensure we do not change avpkt
+        AVPacket tmp = *avpkt;
+        int did_split = av_packet_split_side_data(&tmp);
+        ret = apply_param_change(avctx, &tmp);
+        if (ret < 0)
+            goto fail;
+
+        avctx->internal->pkt = &tmp;
+        if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
+            ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr, &tmp);
+        else {
+            ret = avctx->codec->decode(avctx, frame, got_frame_ptr, &tmp);
+            av_assert0(ret <= tmp.size);
+            frame->pkt_dts = avpkt->dts;
+        }
+        if (ret >= 0 && *got_frame_ptr) {
+            avctx->frame_number++;
+            av_frame_set_best_effort_timestamp(frame,
+                                               guess_correct_pts(avctx,
+                                                                 frame->pkt_pts,
+                                                                 frame->pkt_dts));
+            if (frame->format == AV_SAMPLE_FMT_NONE)
+                frame->format = avctx->sample_fmt;
+            if (!frame->channel_layout)
+                frame->channel_layout = avctx->channel_layout;
+            if (!av_frame_get_channels(frame))
+                av_frame_set_channels(frame, avctx->channels);
+            if (!frame->sample_rate)
+                frame->sample_rate = avctx->sample_rate;
+        }
+
+        side= av_packet_get_side_data(avctx->internal->pkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
+        if(side && side_size>=10) {
+            avctx->internal->skip_samples = AV_RL32(side);
+            discard_padding = AV_RL32(side + 4);
+            av_log(avctx, AV_LOG_DEBUG, "skip %d / discard %d samples due to side data\n",
+                   avctx->internal->skip_samples, (int)discard_padding);
+            skip_reason = AV_RL8(side + 8);
+            discard_reason = AV_RL8(side + 9);
+        }
+        if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
+            if(frame->nb_samples <= avctx->internal->skip_samples){
+                *got_frame_ptr = 0;
+                avctx->internal->skip_samples -= frame->nb_samples;
+                av_log(avctx, AV_LOG_DEBUG, "skip whole frame, skip left: %d\n",
+                       avctx->internal->skip_samples);
+            } else {
+                av_samples_copy(frame->extended_data, frame->extended_data, 0, avctx->internal->skip_samples,
+                                frame->nb_samples - avctx->internal->skip_samples, avctx->channels, frame->format);
+                if(avctx->pkt_timebase.num && avctx->sample_rate) {
+                    int64_t diff_ts = av_rescale_q(avctx->internal->skip_samples,
+                                                   (AVRational){1, avctx->sample_rate},
+                                                   avctx->pkt_timebase);
+                    if(frame->pkt_pts!=AV_NOPTS_VALUE)
+                        frame->pkt_pts += diff_ts;
+                    if(frame->pkt_dts!=AV_NOPTS_VALUE)
+                        frame->pkt_dts += diff_ts;
+                    if (av_frame_get_pkt_duration(frame) >= diff_ts)
+                        av_frame_set_pkt_duration(frame, av_frame_get_pkt_duration(frame) - diff_ts);
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for skipped samples.\n");
+                }
+                av_log(avctx, AV_LOG_DEBUG, "skip %d/%d samples\n",
+                       avctx->internal->skip_samples, frame->nb_samples);
+                frame->nb_samples -= avctx->internal->skip_samples;
+                avctx->internal->skip_samples = 0;
+            }
+        }
+
+        if (discard_padding > 0 && discard_padding <= frame->nb_samples && *got_frame_ptr &&
+            !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
+            if (discard_padding == frame->nb_samples) {
+                *got_frame_ptr = 0;
+            } else {
+                if(avctx->pkt_timebase.num && avctx->sample_rate) {
+                    int64_t diff_ts = av_rescale_q(frame->nb_samples - discard_padding,
+                                                   (AVRational){1, avctx->sample_rate},
+                                                   avctx->pkt_timebase);
+                    av_frame_set_pkt_duration(frame, diff_ts);
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for discarded samples.\n");
+                }
+                av_log(avctx, AV_LOG_DEBUG, "discard %d/%d samples\n",
+                       (int)discard_padding, frame->nb_samples);
+                frame->nb_samples -= discard_padding;
+            }
+        }
+
+        if ((avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL) && *got_frame_ptr) {
+            AVFrameSideData *fside = av_frame_new_side_data(frame, AV_FRAME_DATA_SKIP_SAMPLES, 10);
+            if (fside) {
+                AV_WL32(fside->data, avctx->internal->skip_samples);
+                AV_WL32(fside->data + 4, discard_padding);
+                AV_WL8(fside->data + 8, skip_reason);
+                AV_WL8(fside->data + 9, discard_reason);
+                avctx->internal->skip_samples = 0;
+            }
+        }
+fail:
+        avctx->internal->pkt = NULL;
+        if (did_split) {
+            av_packet_free_side_data(&tmp);
+            if(ret == tmp.size)
+                ret = avpkt->size;
+        }
+
+        if (ret >= 0 && *got_frame_ptr) {
+            if (!avctx->refcounted_frames) {
+                int err = unrefcount_frame(avci, frame);
+                if (err < 0)
+                    return err;
+            }
+        } else
+            av_frame_unref(frame);
+    }
+
+    av_assert0(ret <= avpkt->size);
+
+    return ret;
+}
+
+#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
+static int recode_subtitle(AVCodecContext *avctx,
+                           AVPacket *outpkt, const AVPacket *inpkt)
+{
+#if CONFIG_ICONV
+    iconv_t cd = (iconv_t)-1;
+    int ret = 0;
+    char *inb, *outb;
+    size_t inl, outl;
+    AVPacket tmp;
+#endif
+
+    if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER || inpkt->size == 0)
+        return 0;
+
+#if CONFIG_ICONV
+    cd = iconv_open("UTF-8", avctx->sub_charenc);
+    av_assert0(cd != (iconv_t)-1);
+
+    inb = inpkt->data;
+    inl = inpkt->size;
+
+    if (inl >= INT_MAX / UTF8_MAX_BYTES - AV_INPUT_BUFFER_PADDING_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES);
+    if (ret < 0)
+        goto end;
+    outpkt->buf  = tmp.buf;
+    outpkt->data = tmp.data;
+    outpkt->size = tmp.size;
+    outb = outpkt->data;
+    outl = outpkt->size;
+
+    if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
+        iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 ||
+        outl >= outpkt->size || inl != 0) {
+        ret = FFMIN(AVERROR(errno), -1);
+        av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
+               "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
+        av_packet_unref(&tmp);
+        goto end;
+    }
+    outpkt->size -= outl;
+    memset(outpkt->data + outpkt->size, 0, outl);
+
+end:
+    if (cd != (iconv_t)-1)
+        iconv_close(cd);
+    return ret;
+#else
+    av_log(avctx, AV_LOG_ERROR, "requesting subtitles recoding without iconv");
+    return AVERROR(EINVAL);
+#endif
+}
+
+static int utf8_check(const uint8_t *str)
+{
+    const uint8_t *byte;
+    uint32_t codepoint, min;
+
+    while (*str) {
+        byte = str;
+        GET_UTF8(codepoint, *(byte++), return 0;);
+        min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 :
+              1 << (5 * (byte - str) - 4);
+        if (codepoint < min || codepoint >= 0x110000 ||
+            codepoint == 0xFFFE /* BOM */ ||
+            codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */)
+            return 0;
+        str = byte;
+    }
+    return 1;
+}
+
+#if FF_API_ASS_TIMING
+static void insert_ts(AVBPrint *buf, int ts)
+{
+    if (ts == -1) {
+        av_bprintf(buf, "9:59:59.99,");
+    } else {
+        int h, m, s;
+
+        h = ts/360000;  ts -= 360000*h;
+        m = ts/  6000;  ts -=   6000*m;
+        s = ts/   100;  ts -=    100*s;
+        av_bprintf(buf, "%d:%02d:%02d.%02d,", h, m, s, ts);
+    }
+}
+
+static int convert_sub_to_old_ass_form(AVSubtitle *sub, const AVPacket *pkt, AVRational tb)
+{
+    int i;
+    AVBPrint buf;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (i = 0; i < sub->num_rects; i++) {
+        char *final_dialog;
+        const char *dialog;
+        AVSubtitleRect *rect = sub->rects[i];
+        int ts_start, ts_duration = -1;
+        long int layer;
+
+        if (rect->type != SUBTITLE_ASS || !strncmp(rect->ass, "Dialogue: ", 10))
+            continue;
+
+        av_bprint_clear(&buf);
+
+        /* skip ReadOrder */
+        dialog = strchr(rect->ass, ',');
+        if (!dialog)
+            continue;
+        dialog++;
+
+        /* extract Layer or Marked */
+        layer = strtol(dialog, (char**)&dialog, 10);
+        if (*dialog != ',')
+            continue;
+        dialog++;
+
+        /* rescale timing to ASS time base (ms) */
+        ts_start = av_rescale_q(pkt->pts, tb, av_make_q(1, 100));
+        if (pkt->duration != -1)
+            ts_duration = av_rescale_q(pkt->duration, tb, av_make_q(1, 100));
+        sub->end_display_time = FFMAX(sub->end_display_time, 10 * ts_duration);
+
+        /* construct ASS (standalone file form with timestamps) string */
+        av_bprintf(&buf, "Dialogue: %ld,", layer);
+        insert_ts(&buf, ts_start);
+        insert_ts(&buf, ts_duration == -1 ? -1 : ts_start + ts_duration);
+        av_bprintf(&buf, "%s\r\n", dialog);
+
+        final_dialog = av_strdup(buf.str);
+        if (!av_bprint_is_complete(&buf) || !final_dialog) {
+            av_freep(&final_dialog);
+            av_bprint_finalize(&buf, NULL);
+            return AVERROR(ENOMEM);
+        }
+        av_freep(&rect->ass);
+        rect->ass = final_dialog;
+    }
+
+    av_bprint_finalize(&buf, NULL);
+    return 0;
+}
+#endif
+
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                             int *got_sub_ptr,
+                             AVPacket *avpkt)
+{
+    int i, ret = 0;
+
+    if (!avpkt->data && avpkt->size) {
+        av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n");
+        return AVERROR(EINVAL);
+    }
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+    if (avctx->codec->type != AVMEDIA_TYPE_SUBTITLE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for subtitles\n");
+        return AVERROR(EINVAL);
+    }
+
+    *got_sub_ptr = 0;
+    get_subtitle_defaults(sub);
+
+    if ((avctx->codec->capabilities & AV_CODEC_CAP_DELAY) || avpkt->size) {
+        AVPacket pkt_recoded;
+        AVPacket tmp = *avpkt;
+        int did_split = av_packet_split_side_data(&tmp);
+        //apply_param_change(avctx, &tmp);
+
+        if (did_split) {
+            /* FFMIN() prevents overflow in case the packet wasn't allocated with
+             * proper padding.
+             * If the side data is smaller than the buffer padding size, the
+             * remaining bytes should have already been filled with zeros by the
+             * original packet allocation anyway. */
+            memset(tmp.data + tmp.size, 0,
+                   FFMIN(avpkt->size - tmp.size, AV_INPUT_BUFFER_PADDING_SIZE));
+        }
+
+        pkt_recoded = tmp;
+        ret = recode_subtitle(avctx, &pkt_recoded, &tmp);
+        if (ret < 0) {
+            *got_sub_ptr = 0;
+        } else {
+            avctx->internal->pkt = &pkt_recoded;
+
+            if (avctx->pkt_timebase.num && avpkt->pts != AV_NOPTS_VALUE)
+                sub->pts = av_rescale_q(avpkt->pts,
+                                        avctx->pkt_timebase, AV_TIME_BASE_Q);
+            ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded);
+            av_assert1((ret >= 0) >= !!*got_sub_ptr &&
+                       !!*got_sub_ptr >= !!sub->num_rects);
+
+#if FF_API_ASS_TIMING
+            if (avctx->sub_text_format == FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS
+                && *got_sub_ptr && sub->num_rects) {
+                const AVRational tb = avctx->pkt_timebase.num ? avctx->pkt_timebase
+                                                              : avctx->time_base;
+                int err = convert_sub_to_old_ass_form(sub, avpkt, tb);
+                if (err < 0)
+                    ret = err;
+            }
+#endif
+
+            if (sub->num_rects && !sub->end_display_time && avpkt->duration &&
+                avctx->pkt_timebase.num) {
+                AVRational ms = { 1, 1000 };
+                sub->end_display_time = av_rescale_q(avpkt->duration,
+                                                     avctx->pkt_timebase, ms);
+            }
+
+            for (i = 0; i < sub->num_rects; i++) {
+                if (sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Invalid UTF-8 in decoded subtitles text; "
+                           "maybe missing -sub_charenc option\n");
+                    avsubtitle_free(sub);
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+
+            if (tmp.data != pkt_recoded.data) { // did we recode?
+                /* prevent from destroying side data from original packet */
+                pkt_recoded.side_data = NULL;
+                pkt_recoded.side_data_elems = 0;
+
+                av_packet_unref(&pkt_recoded);
+            }
+            if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
+                sub->format = 0;
+            else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
+                sub->format = 1;
+            avctx->internal->pkt = NULL;
+        }
+
+        if (did_split) {
+            av_packet_free_side_data(&tmp);
+            if(ret == tmp.size)
+                ret = avpkt->size;
+        }
+
+        if (*got_sub_ptr)
+            avctx->frame_number++;
+    }
+
+    return ret;
+}
+
+void avsubtitle_free(AVSubtitle *sub)
+{
+    int i;
+
+    for (i = 0; i < sub->num_rects; i++) {
+        av_freep(&sub->rects[i]->data[0]);
+        av_freep(&sub->rects[i]->data[1]);
+        av_freep(&sub->rects[i]->data[2]);
+        av_freep(&sub->rects[i]->data[3]);
+        av_freep(&sub->rects[i]->text);
+        av_freep(&sub->rects[i]->ass);
+        av_freep(&sub->rects[i]);
+    }
+
+    av_freep(&sub->rects);
+
+    memset(sub, 0, sizeof(AVSubtitle));
+}
+
+static int do_decode(AVCodecContext *avctx, AVPacket *pkt)
+{
+    int got_frame;
+    int ret;
+
+    av_assert0(!avctx->internal->buffer_frame->buf[0]);
+
+    if (!pkt)
+        pkt = avctx->internal->buffer_pkt;
+
+    // This is the lesser evil. The field is for compatibility with legacy users
+    // of the legacy API, and users using the new API should not be forced to
+    // even know about this field.
+    avctx->refcounted_frames = 1;
+
+    // Some codecs (at least wma lossless) will crash when feeding drain packets
+    // after EOF was signaled.
+    if (avctx->internal->draining_done)
+        return AVERROR_EOF;
+
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        ret = avcodec_decode_video2(avctx, avctx->internal->buffer_frame,
+                                    &got_frame, pkt);
+        if (ret >= 0)
+            ret = pkt->size;
+    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
+        ret = avcodec_decode_audio4(avctx, avctx->internal->buffer_frame,
+                                    &got_frame, pkt);
+    } else {
+        ret = AVERROR(EINVAL);
+    }
+
+    if (ret == AVERROR(EAGAIN))
+        ret = pkt->size;
+
+    if (ret < 0)
+        return ret;
+
+    if (avctx->internal->draining && !got_frame)
+        avctx->internal->draining_done = 1;
+
+    if (ret >= pkt->size) {
+        av_packet_unref(avctx->internal->buffer_pkt);
+    } else {
+        int consumed = ret;
+
+        if (pkt != avctx->internal->buffer_pkt) {
+            av_packet_unref(avctx->internal->buffer_pkt);
+            if ((ret = av_packet_ref(avctx->internal->buffer_pkt, pkt)) < 0)
+                return ret;
+        }
+
+        avctx->internal->buffer_pkt->data += consumed;
+        avctx->internal->buffer_pkt->size -= consumed;
+        avctx->internal->buffer_pkt->pts   = AV_NOPTS_VALUE;
+        avctx->internal->buffer_pkt->dts   = AV_NOPTS_VALUE;
+    }
+
+    if (got_frame)
+        av_assert0(avctx->internal->buffer_frame->buf[0]);
+
+    return 0;
+}
+
+int attribute_align_arg avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt)
+{
+    int ret;
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avctx->internal->draining)
+        return AVERROR_EOF;
+
+    if (!avpkt || !avpkt->size) {
+        avctx->internal->draining = 1;
+        avpkt = NULL;
+
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+            return 0;
+    }
+
+    if (avctx->codec->send_packet) {
+        if (avpkt) {
+            AVPacket tmp = *avpkt;
+            int did_split = av_packet_split_side_data(&tmp);
+            ret = apply_param_change(avctx, &tmp);
+            if (ret >= 0)
+                ret = avctx->codec->send_packet(avctx, &tmp);
+            if (did_split)
+                av_packet_free_side_data(&tmp);
+            return ret;
+        } else {
+            return avctx->codec->send_packet(avctx, NULL);
+        }
+    }
+
+    // Emulation via old API. Assume avpkt is likely not refcounted, while
+    // decoder output is always refcounted, and avoid copying.
+
+    if (avctx->internal->buffer_pkt->size || avctx->internal->buffer_frame->buf[0])
+        return AVERROR(EAGAIN);
+
+    // The goal is decoding the first frame of the packet without using memcpy,
+    // because the common case is having only 1 frame per packet (especially
+    // with video, but audio too). In other cases, it can't be avoided, unless
+    // the user is feeding refcounted packets.
+    return do_decode(avctx, (AVPacket *)avpkt);
+}
+
+int attribute_align_arg avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret;
+
+    av_frame_unref(frame);
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_decoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avctx->codec->receive_frame) {
+        if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+            return AVERROR_EOF;
+        return avctx->codec->receive_frame(avctx, frame);
+    }
+
+    // Emulation via old API.
+
+    if (!avctx->internal->buffer_frame->buf[0]) {
+        if (!avctx->internal->buffer_pkt->size && !avctx->internal->draining)
+            return AVERROR(EAGAIN);
+
+        while (1) {
+            if ((ret = do_decode(avctx, avctx->internal->buffer_pkt)) < 0) {
+                av_packet_unref(avctx->internal->buffer_pkt);
+                return ret;
+            }
+            // Some audio decoders may consume partial data without returning
+            // a frame (fate-wmapro-2ch). There is no way to make the caller
+            // call avcodec_receive_frame() again without returning a frame,
+            // so try to decode more in these cases.
+            if (avctx->internal->buffer_frame->buf[0] ||
+                !avctx->internal->buffer_pkt->size)
+                break;
+        }
+    }
+
+    if (!avctx->internal->buffer_frame->buf[0])
+        return avctx->internal->draining ? AVERROR_EOF : AVERROR(EAGAIN);
+
+    av_frame_move_ref(frame, avctx->internal->buffer_frame);
+    return 0;
+}
+
+static int do_encode(AVCodecContext *avctx, const AVFrame *frame, int *got_packet)
+{
+    int ret;
+    *got_packet = 0;
+
+    av_packet_unref(avctx->internal->buffer_pkt);
+    avctx->internal->buffer_pkt_valid = 0;
+
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        ret = avcodec_encode_video2(avctx, avctx->internal->buffer_pkt,
+                                    frame, got_packet);
+    } else if (avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
+        ret = avcodec_encode_audio2(avctx, avctx->internal->buffer_pkt,
+                                    frame, got_packet);
+    } else {
+        ret = AVERROR(EINVAL);
+    }
+
+    if (ret >= 0 && *got_packet) {
+        // Encoders must always return ref-counted buffers.
+        // Side-data only packets have no data and can be not ref-counted.
+        av_assert0(!avctx->internal->buffer_pkt->data || avctx->internal->buffer_pkt->buf);
+        avctx->internal->buffer_pkt_valid = 1;
+        ret = 0;
+    } else {
+        av_packet_unref(avctx->internal->buffer_pkt);
+    }
+
+    return ret;
+}
+
+int attribute_align_arg avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+{
+    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avctx->internal->draining)
+        return AVERROR_EOF;
+
+    if (!frame) {
+        avctx->internal->draining = 1;
+
+        if (!(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+            return 0;
+    }
+
+    if (avctx->codec->send_frame)
+        return avctx->codec->send_frame(avctx, frame);
+
+    // Emulation via old API. Do it here instead of avcodec_receive_packet, because:
+    // 1. if the AVFrame is not refcounted, the copying will be much more
+    //    expensive than copying the packet data
+    // 2. assume few users use non-refcounted AVPackets, so usually no copy is
+    //    needed
+
+    if (avctx->internal->buffer_pkt_valid)
+        return AVERROR(EAGAIN);
+
+    return do_encode(avctx, frame, &(int){0});
+}
+
+int attribute_align_arg avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+{
+    av_packet_unref(avpkt);
+
+    if (!avcodec_is_open(avctx) || !av_codec_is_encoder(avctx->codec))
+        return AVERROR(EINVAL);
+
+    if (avctx->codec->receive_packet) {
+        if (avctx->internal->draining && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY))
+            return AVERROR_EOF;
+        return avctx->codec->receive_packet(avctx, avpkt);
+    }
+
+    // Emulation via old API.
+
+    if (!avctx->internal->buffer_pkt_valid) {
+        int got_packet;
+        int ret;
+        if (!avctx->internal->draining)
+            return AVERROR(EAGAIN);
+        ret = do_encode(avctx, NULL, &got_packet);
+        if (ret < 0)
+            return ret;
+        if (ret >= 0 && !got_packet)
+            return AVERROR_EOF;
+    }
+
+    av_packet_move_ref(avpkt, avctx->internal->buffer_pkt);
+    avctx->internal->buffer_pkt_valid = 0;
+    return 0;
+}
+
+av_cold int avcodec_close(AVCodecContext *avctx)
+{
+    int i;
+
+    if (!avctx)
+        return 0;
+
+    if (avcodec_is_open(avctx)) {
+        FramePool *pool = avctx->internal->pool;
+        if (CONFIG_FRAME_THREAD_ENCODER &&
+            avctx->internal->frame_thread_encoder && avctx->thread_count > 1) {
+            ff_frame_thread_encoder_free(avctx);
+        }
+        if (HAVE_THREADS && avctx->internal->thread_ctx)
+            ff_thread_free(avctx);
+        if (avctx->codec && avctx->codec->close)
+            avctx->codec->close(avctx);
+        avctx->internal->byte_buffer_size = 0;
+        av_freep(&avctx->internal->byte_buffer);
+        av_frame_free(&avctx->internal->to_free);
+        av_frame_free(&avctx->internal->buffer_frame);
+        av_packet_free(&avctx->internal->buffer_pkt);
+        for (i = 0; i < FF_ARRAY_ELEMS(pool->pools); i++)
+            av_buffer_pool_uninit(&pool->pools[i]);
+        av_freep(&avctx->internal->pool);
+
+        if (avctx->hwaccel && avctx->hwaccel->uninit)
+            avctx->hwaccel->uninit(avctx);
+        av_freep(&avctx->internal->hwaccel_priv_data);
+
+        av_freep(&avctx->internal);
+    }
+
+    for (i = 0; i < avctx->nb_coded_side_data; i++)
+        av_freep(&avctx->coded_side_data[i].data);
+    av_freep(&avctx->coded_side_data);
+    avctx->nb_coded_side_data = 0;
+
+    av_buffer_unref(&avctx->hw_frames_ctx);
+
+    if (avctx->priv_data && avctx->codec && avctx->codec->priv_class)
+        av_opt_free(avctx->priv_data);
+    av_opt_free(avctx);
+    av_freep(&avctx->priv_data);
+    if (av_codec_is_encoder(avctx->codec)) {
+        av_freep(&avctx->extradata);
+#if FF_API_CODED_FRAME
+FF_DISABLE_DEPRECATION_WARNINGS
+        av_frame_free(&avctx->coded_frame);
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+    }
+    avctx->codec = NULL;
+    avctx->active_thread_type = 0;
+
+    return 0;
+}
+
+static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
+{
+    switch(id){
+        //This is for future deprecatec codec ids, its empty since
+        //last major bump but will fill up again over time, please don't remove it
+        default                                         : return id;
+    }
+}
+
+static AVCodec *find_encdec(enum AVCodecID id, int encoder)
+{
+    AVCodec *p, *experimental = NULL;
+    p = first_avcodec;
+    id= remap_deprecated_codec_id(id);
+    while (p) {
+        if ((encoder ? av_codec_is_encoder(p) : av_codec_is_decoder(p)) &&
+            p->id == id) {
+            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
+                experimental = p;
+            } else
+                return p;
+        }
+        p = p->next;
+    }
+    return experimental;
+}
+
+AVCodec *avcodec_find_encoder(enum AVCodecID id)
+{
+    return find_encdec(id, 1);
+}
+
+AVCodec *avcodec_find_encoder_by_name(const char *name)
+{
+    AVCodec *p;
+    if (!name)
+        return NULL;
+    p = first_avcodec;
+    while (p) {
+        if (av_codec_is_encoder(p) && strcmp(name, p->name) == 0)
+            return p;
+        p = p->next;
+    }
+    return NULL;
+}
+
+AVCodec *avcodec_find_decoder(enum AVCodecID id)
+{
+    return find_encdec(id, 0);
+}
+
+AVCodec *avcodec_find_decoder_by_name(const char *name)
+{
+    AVCodec *p;
+    if (!name)
+        return NULL;
+    p = first_avcodec;
+    while (p) {
+        if (av_codec_is_decoder(p) && strcmp(name, p->name) == 0)
+            return p;
+        p = p->next;
+    }
+    return NULL;
+}
+
+const char *avcodec_get_name(enum AVCodecID id)
+{
+    const AVCodecDescriptor *cd;
+    AVCodec *codec;
+
+    if (id == AV_CODEC_ID_NONE)
+        return "none";
+    cd = avcodec_descriptor_get(id);
+    if (cd)
+        return cd->name;
+    av_log(NULL, AV_LOG_WARNING, "Codec 0x%x is not in the full list.\n", id);
+    codec = avcodec_find_decoder(id);
+    if (codec)
+        return codec->name;
+    codec = avcodec_find_encoder(id);
+    if (codec)
+        return codec->name;
+    return "unknown_codec";
+}
+
+size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_tag)
+{
+    int i, len, ret = 0;
+
+#define TAG_PRINT(x)                                              \
+    (((x) >= '0' && (x) <= '9') ||                                \
+     ((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z') ||  \
+     ((x) == '.' || (x) == ' ' || (x) == '-' || (x) == '_'))
+
+    for (i = 0; i < 4; i++) {
+        len = snprintf(buf, buf_size,
+                       TAG_PRINT(codec_tag & 0xFF) ? "%c" : "[%d]", codec_tag & 0xFF);
+        buf        += len;
+        buf_size    = buf_size > len ? buf_size - len : 0;
+        ret        += len;
+        codec_tag >>= 8;
+    }
+    return ret;
+}
+
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
+{
+    const char *codec_type;
+    const char *codec_name;
+    const char *profile = NULL;
+    int64_t bitrate;
+    int new_line = 0;
+    AVRational display_aspect_ratio;
+    const char *separator = enc->dump_separator ? (const char *)enc->dump_separator : ", ";
+
+    if (!buf || buf_size <= 0)
+        return;
+    codec_type = av_get_media_type_string(enc->codec_type);
+    codec_name = avcodec_get_name(enc->codec_id);
+    profile = avcodec_profile_name(enc->codec_id, enc->profile);
+
+    snprintf(buf, buf_size, "%s: %s", codec_type ? codec_type : "unknown",
+             codec_name);
+    buf[0] ^= 'a' ^ 'A'; /* first letter in uppercase */
+
+    if (enc->codec && strcmp(enc->codec->name, codec_name))
+        snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s)", enc->codec->name);
+
+    if (profile)
+        snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s)", profile);
+    if (   enc->codec_type == AVMEDIA_TYPE_VIDEO
+        && av_log_get_level() >= AV_LOG_VERBOSE
+        && enc->refs)
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 ", %d reference frame%s",
+                 enc->refs, enc->refs > 1 ? "s" : "");
+
+    if (enc->codec_tag) {
+        char tag_buf[32];
+        av_get_codec_tag_string(tag_buf, sizeof(tag_buf), enc->codec_tag);
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 " (%s / 0x%04X)", tag_buf, enc->codec_tag);
+    }
+
+    switch (enc->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        {
+            char detail[256] = "(";
+
+            av_strlcat(buf, separator, buf_size);
+
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 "%s", enc->pix_fmt == AV_PIX_FMT_NONE ? "none" :
+                     av_get_pix_fmt_name(enc->pix_fmt));
+            if (enc->bits_per_raw_sample && enc->pix_fmt != AV_PIX_FMT_NONE &&
+                enc->bits_per_raw_sample < av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth)
+                av_strlcatf(detail, sizeof(detail), "%d bpc, ", enc->bits_per_raw_sample);
+            if (enc->color_range != AVCOL_RANGE_UNSPECIFIED)
+                av_strlcatf(detail, sizeof(detail), "%s, ",
+                            av_color_range_name(enc->color_range));
+
+            if (enc->colorspace != AVCOL_SPC_UNSPECIFIED ||
+                enc->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+                enc->color_trc != AVCOL_TRC_UNSPECIFIED) {
+                if (enc->colorspace != (int)enc->color_primaries ||
+                    enc->colorspace != (int)enc->color_trc) {
+                    new_line = 1;
+                    av_strlcatf(detail, sizeof(detail), "%s/%s/%s, ",
+                                av_color_space_name(enc->colorspace),
+                                av_color_primaries_name(enc->color_primaries),
+                                av_color_transfer_name(enc->color_trc));
+                } else
+                    av_strlcatf(detail, sizeof(detail), "%s, ",
+                                av_get_colorspace_name(enc->colorspace));
+            }
+
+            if (av_log_get_level() >= AV_LOG_DEBUG &&
+                enc->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED)
+                av_strlcatf(detail, sizeof(detail), "%s, ",
+                            av_chroma_location_name(enc->chroma_sample_location));
+
+            if (strlen(detail) > 1) {
+                detail[strlen(detail) - 2] = 0;
+                av_strlcatf(buf, buf_size, "%s)", detail);
+            }
+        }
+
+        if (enc->width) {
+            av_strlcat(buf, new_line ? separator : ", ", buf_size);
+
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     "%dx%d",
+                     enc->width, enc->height);
+
+            if (av_log_get_level() >= AV_LOG_VERBOSE &&
+                (enc->width != enc->coded_width ||
+                 enc->height != enc->coded_height))
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         " (%dx%d)", enc->coded_width, enc->coded_height);
+
+            if (enc->sample_aspect_ratio.num) {
+                av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den,
+                          enc->width * (int64_t)enc->sample_aspect_ratio.num,
+                          enc->height * (int64_t)enc->sample_aspect_ratio.den,
+                          1024 * 1024);
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         " [SAR %d:%d DAR %d:%d]",
+                         enc->sample_aspect_ratio.num, enc->sample_aspect_ratio.den,
+                         display_aspect_ratio.num, display_aspect_ratio.den);
+            }
+            if (av_log_get_level() >= AV_LOG_DEBUG) {
+                int g = av_gcd(enc->time_base.num, enc->time_base.den);
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", %d/%d",
+                         enc->time_base.num / g, enc->time_base.den / g);
+            }
+        }
+        if (encode) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", q=%d-%d", enc->qmin, enc->qmax);
+        } else {
+            if (enc->properties & FF_CODEC_PROPERTY_CLOSED_CAPTIONS)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", Closed Captions");
+            if (enc->properties & FF_CODEC_PROPERTY_LOSSLESS)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", lossless");
+        }
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        av_strlcat(buf, separator, buf_size);
+
+        if (enc->sample_rate) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     "%d Hz, ", enc->sample_rate);
+        }
+        av_get_channel_layout_string(buf + strlen(buf), buf_size - strlen(buf), enc->channels, enc->channel_layout);
+        if (enc->sample_fmt != AV_SAMPLE_FMT_NONE) {
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %s", av_get_sample_fmt_name(enc->sample_fmt));
+        }
+        if (   enc->bits_per_raw_sample > 0
+            && enc->bits_per_raw_sample != av_get_bytes_per_sample(enc->sample_fmt) * 8)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     " (%d bit)", enc->bits_per_raw_sample);
+        break;
+    case AVMEDIA_TYPE_DATA:
+        if (av_log_get_level() >= AV_LOG_DEBUG) {
+            int g = av_gcd(enc->time_base.num, enc->time_base.den);
+            if (g)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", %d/%d",
+                         enc->time_base.num / g, enc->time_base.den / g);
+        }
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        if (enc->width)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %dx%d", enc->width, enc->height);
+        break;
+    default:
+        return;
+    }
+    if (encode) {
+        if (enc->flags & AV_CODEC_FLAG_PASS1)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", pass 1");
+        if (enc->flags & AV_CODEC_FLAG_PASS2)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", pass 2");
+    }
+    bitrate = get_bit_rate(enc);
+    if (bitrate != 0) {
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 ", %"PRId64" kb/s", bitrate / 1000);
+    } else if (enc->rc_max_rate > 0) {
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 ", max. %"PRId64" kb/s", (int64_t)enc->rc_max_rate / 1000);
+    }
+}
+
+const char *av_get_profile_name(const AVCodec *codec, int profile)
+{
+    const AVProfile *p;
+    if (profile == FF_PROFILE_UNKNOWN || !codec->profiles)
+        return NULL;
+
+    for (p = codec->profiles; p->profile != FF_PROFILE_UNKNOWN; p++)
+        if (p->profile == profile)
+            return p->name;
+
+    return NULL;
+}
+
+const char *avcodec_profile_name(enum AVCodecID codec_id, int profile)
+{
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(codec_id);
+    const AVProfile *p;
+
+    if (profile == FF_PROFILE_UNKNOWN || !desc || !desc->profiles)
+        return NULL;
+
+    for (p = desc->profiles; p->profile != FF_PROFILE_UNKNOWN; p++)
+        if (p->profile == profile)
+            return p->name;
+
+    return NULL;
+}
+
+unsigned avcodec_version(void)
+{
+//    av_assert0(AV_CODEC_ID_V410==164);
+    av_assert0(AV_CODEC_ID_PCM_S8_PLANAR==65563);
+    av_assert0(AV_CODEC_ID_ADPCM_G722==69660);
+//     av_assert0(AV_CODEC_ID_BMV_AUDIO==86071);
+    av_assert0(AV_CODEC_ID_SRT==94216);
+    av_assert0(LIBAVCODEC_VERSION_MICRO >= 100);
+
+    return LIBAVCODEC_VERSION_INT;
+}
+
+const char *avcodec_configuration(void)
+{
+    return FFMPEG_CONFIGURATION;
+}
+
+const char *avcodec_license(void)
+{
+#define LICENSE_PREFIX "libavcodec license: "
+    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+}
+
+void avcodec_flush_buffers(AVCodecContext *avctx)
+{
+    avctx->internal->draining      = 0;
+    avctx->internal->draining_done = 0;
+    av_frame_unref(avctx->internal->buffer_frame);
+    av_packet_unref(avctx->internal->buffer_pkt);
+    avctx->internal->buffer_pkt_valid = 0;
+
+    if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
+        ff_thread_flush(avctx);
+    else if (avctx->codec->flush)
+        avctx->codec->flush(avctx);
+
+    avctx->pts_correction_last_pts =
+    avctx->pts_correction_last_dts = INT64_MIN;
+
+    if (!avctx->refcounted_frames)
+        av_frame_unref(avctx->internal->to_free);
+}
+
+int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
+{
+    switch (codec_id) {
+    case AV_CODEC_ID_8SVX_EXP:
+    case AV_CODEC_ID_8SVX_FIB:
+    case AV_CODEC_ID_ADPCM_CT:
+    case AV_CODEC_ID_ADPCM_IMA_APC:
+    case AV_CODEC_ID_ADPCM_IMA_EA_SEAD:
+    case AV_CODEC_ID_ADPCM_IMA_OKI:
+    case AV_CODEC_ID_ADPCM_IMA_WS:
+    case AV_CODEC_ID_ADPCM_G722:
+    case AV_CODEC_ID_ADPCM_YAMAHA:
+    case AV_CODEC_ID_ADPCM_AICA:
+        return 4;
+    case AV_CODEC_ID_DSD_LSBF:
+    case AV_CODEC_ID_DSD_MSBF:
+    case AV_CODEC_ID_DSD_LSBF_PLANAR:
+    case AV_CODEC_ID_DSD_MSBF_PLANAR:
+    case AV_CODEC_ID_PCM_ALAW:
+    case AV_CODEC_ID_PCM_MULAW:
+    case AV_CODEC_ID_PCM_S8:
+    case AV_CODEC_ID_PCM_S8_PLANAR:
+    case AV_CODEC_ID_PCM_U8:
+    case AV_CODEC_ID_PCM_ZORK:
+    case AV_CODEC_ID_SDX2_DPCM:
+        return 8;
+    case AV_CODEC_ID_PCM_S16BE:
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+    case AV_CODEC_ID_PCM_S16LE:
+    case AV_CODEC_ID_PCM_S16LE_PLANAR:
+    case AV_CODEC_ID_PCM_U16BE:
+    case AV_CODEC_ID_PCM_U16LE:
+        return 16;
+    case AV_CODEC_ID_PCM_S24DAUD:
+    case AV_CODEC_ID_PCM_S24BE:
+    case AV_CODEC_ID_PCM_S24LE:
+    case AV_CODEC_ID_PCM_S24LE_PLANAR:
+    case AV_CODEC_ID_PCM_U24BE:
+    case AV_CODEC_ID_PCM_U24LE:
+        return 24;
+    case AV_CODEC_ID_PCM_S32BE:
+    case AV_CODEC_ID_PCM_S32LE:
+    case AV_CODEC_ID_PCM_S32LE_PLANAR:
+    case AV_CODEC_ID_PCM_U32BE:
+    case AV_CODEC_ID_PCM_U32LE:
+    case AV_CODEC_ID_PCM_F32BE:
+    case AV_CODEC_ID_PCM_F32LE:
+        return 32;
+    case AV_CODEC_ID_PCM_F64BE:
+    case AV_CODEC_ID_PCM_F64LE:
+        return 64;
+    default:
+        return 0;
+    }
+}
+
+enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be)
+{
+    static const enum AVCodecID map[AV_SAMPLE_FMT_NB][2] = {
+        [AV_SAMPLE_FMT_U8  ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
+        [AV_SAMPLE_FMT_S16 ] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
+        [AV_SAMPLE_FMT_S32 ] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_FLT ] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
+        [AV_SAMPLE_FMT_DBL ] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
+        [AV_SAMPLE_FMT_U8P ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
+        [AV_SAMPLE_FMT_S16P] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
+        [AV_SAMPLE_FMT_S32P] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_FLTP] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
+        [AV_SAMPLE_FMT_DBLP] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
+    };
+    if (fmt < 0 || fmt >= AV_SAMPLE_FMT_NB)
+        return AV_CODEC_ID_NONE;
+    if (be < 0 || be > 1)
+        be = AV_NE(1, 0);
+    return map[fmt][be];
+}
+
+int av_get_bits_per_sample(enum AVCodecID codec_id)
+{
+    switch (codec_id) {
+    case AV_CODEC_ID_ADPCM_SBPRO_2:
+        return 2;
+    case AV_CODEC_ID_ADPCM_SBPRO_3:
+        return 3;
+    case AV_CODEC_ID_ADPCM_SBPRO_4:
+    case AV_CODEC_ID_ADPCM_IMA_WAV:
+    case AV_CODEC_ID_ADPCM_IMA_QT:
+    case AV_CODEC_ID_ADPCM_SWF:
+    case AV_CODEC_ID_ADPCM_MS:
+        return 4;
+    default:
+        return av_get_exact_bits_per_sample(codec_id);
+    }
+}
+
+static int get_audio_frame_duration(enum AVCodecID id, int sr, int ch, int ba,
+                                    uint32_t tag, int bits_per_coded_sample, int64_t bitrate,
+                                    uint8_t * extradata, int frame_size, int frame_bytes)
+{
+    int bps = av_get_exact_bits_per_sample(id);
+    int framecount = (ba > 0 && frame_bytes / ba > 0) ? frame_bytes / ba : 1;
+
+    /* codecs with an exact constant bits per sample */
+    if (bps > 0 && ch > 0 && frame_bytes > 0 && ch < 32768 && bps < 32768)
+        return (frame_bytes * 8LL) / (bps * ch);
+    bps = bits_per_coded_sample;
+
+    /* codecs with a fixed packet duration */
+    switch (id) {
+    case AV_CODEC_ID_ADPCM_ADX:    return   32;
+    case AV_CODEC_ID_ADPCM_IMA_QT: return   64;
+    case AV_CODEC_ID_ADPCM_EA_XAS: return  128;
+    case AV_CODEC_ID_AMR_NB:
+    case AV_CODEC_ID_EVRC:
+    case AV_CODEC_ID_GSM:
+    case AV_CODEC_ID_QCELP:
+    case AV_CODEC_ID_RA_288:       return  160;
+    case AV_CODEC_ID_AMR_WB:
+    case AV_CODEC_ID_GSM_MS:       return  320;
+    case AV_CODEC_ID_MP1:          return  384;
+    case AV_CODEC_ID_ATRAC1:       return  512;
+    case AV_CODEC_ID_ATRAC3:       return 1024 * framecount;
+    case AV_CODEC_ID_ATRAC3P:      return 2048;
+    case AV_CODEC_ID_MP2:
+    case AV_CODEC_ID_MUSEPACK7:    return 1152;
+    case AV_CODEC_ID_AC3:          return 1536;
+    }
+
+    if (sr > 0) {
+        /* calc from sample rate */
+        if (id == AV_CODEC_ID_TTA)
+            return 256 * sr / 245;
+        else if (id == AV_CODEC_ID_DST)
+            return 588 * sr / 44100;
+
+        if (ch > 0) {
+            /* calc from sample rate and channels */
+            if (id == AV_CODEC_ID_BINKAUDIO_DCT)
+                return (480 << (sr / 22050)) / ch;
+        }
+    }
+
+    if (ba > 0) {
+        /* calc from block_align */
+        if (id == AV_CODEC_ID_SIPR) {
+            switch (ba) {
+            case 20: return 160;
+            case 19: return 144;
+            case 29: return 288;
+            case 37: return 480;
+            }
+        } else if (id == AV_CODEC_ID_ILBC) {
+            switch (ba) {
+            case 38: return 160;
+            case 50: return 240;
+            }
+        }
+    }
+
+    if (frame_bytes > 0) {
+        /* calc from frame_bytes only */
+        if (id == AV_CODEC_ID_TRUESPEECH)
+            return 240 * (frame_bytes / 32);
+        if (id == AV_CODEC_ID_NELLYMOSER)
+            return 256 * (frame_bytes / 64);
+        if (id == AV_CODEC_ID_RA_144)
+            return 160 * (frame_bytes / 20);
+        if (id == AV_CODEC_ID_G723_1)
+            return 240 * (frame_bytes / 24);
+
+        if (bps > 0) {
+            /* calc from frame_bytes and bits_per_coded_sample */
+            if (id == AV_CODEC_ID_ADPCM_G726)
+                return frame_bytes * 8 / bps;
+        }
+
+        if (ch > 0 && ch < INT_MAX/16) {
+            /* calc from frame_bytes and channels */
+            switch (id) {
+            case AV_CODEC_ID_ADPCM_AFC:
+                return frame_bytes / (9 * ch) * 16;
+            case AV_CODEC_ID_ADPCM_PSX:
+            case AV_CODEC_ID_ADPCM_DTK:
+                return frame_bytes / (16 * ch) * 28;
+            case AV_CODEC_ID_ADPCM_4XM:
+            case AV_CODEC_ID_ADPCM_IMA_DAT4:
+            case AV_CODEC_ID_ADPCM_IMA_ISS:
+                return (frame_bytes - 4 * ch) * 2 / ch;
+            case AV_CODEC_ID_ADPCM_IMA_SMJPEG:
+                return (frame_bytes - 4) * 2 / ch;
+            case AV_CODEC_ID_ADPCM_IMA_AMV:
+                return (frame_bytes - 8) * 2 / ch;
+            case AV_CODEC_ID_ADPCM_THP:
+            case AV_CODEC_ID_ADPCM_THP_LE:
+                if (extradata)
+                    return frame_bytes * 14 / (8 * ch);
+                break;
+            case AV_CODEC_ID_ADPCM_XA:
+                return (frame_bytes / 128) * 224 / ch;
+            case AV_CODEC_ID_INTERPLAY_DPCM:
+                return (frame_bytes - 6 - ch) / ch;
+            case AV_CODEC_ID_ROQ_DPCM:
+                return (frame_bytes - 8) / ch;
+            case AV_CODEC_ID_XAN_DPCM:
+                return (frame_bytes - 2 * ch) / ch;
+            case AV_CODEC_ID_MACE3:
+                return 3 * frame_bytes / ch;
+            case AV_CODEC_ID_MACE6:
+                return 6 * frame_bytes / ch;
+            case AV_CODEC_ID_PCM_LXF:
+                return 2 * (frame_bytes / (5 * ch));
+            case AV_CODEC_ID_IAC:
+            case AV_CODEC_ID_IMC:
+                return 4 * frame_bytes / ch;
+            }
+
+            if (tag) {
+                /* calc from frame_bytes, channels, and codec_tag */
+                if (id == AV_CODEC_ID_SOL_DPCM) {
+                    if (tag == 3)
+                        return frame_bytes / ch;
+                    else
+                        return frame_bytes * 2 / ch;
+                }
+            }
+
+            if (ba > 0) {
+                /* calc from frame_bytes, channels, and block_align */
+                int blocks = frame_bytes / ba;
+                switch (id) {
+                case AV_CODEC_ID_ADPCM_IMA_WAV:
+                    if (bps < 2 || bps > 5)
+                        return 0;
+                    return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8);
+                case AV_CODEC_ID_ADPCM_IMA_DK3:
+                    return blocks * (((ba - 16) * 2 / 3 * 4) / ch);
+                case AV_CODEC_ID_ADPCM_IMA_DK4:
+                    return blocks * (1 + (ba - 4 * ch) * 2 / ch);
+                case AV_CODEC_ID_ADPCM_IMA_RAD:
+                    return blocks * ((ba - 4 * ch) * 2 / ch);
+                case AV_CODEC_ID_ADPCM_MS:
+                    return blocks * (2 + (ba - 7 * ch) * 2 / ch);
+                case AV_CODEC_ID_ADPCM_MTAF:
+                    return blocks * (ba - 16) * 2 / ch;
+                }
+            }
+
+            if (bps > 0) {
+                /* calc from frame_bytes, channels, and bits_per_coded_sample */
+                switch (id) {
+                case AV_CODEC_ID_PCM_DVD:
+                    if(bps<4)
+                        return 0;
+                    return 2 * (frame_bytes / ((bps * 2 / 8) * ch));
+                case AV_CODEC_ID_PCM_BLURAY:
+                    if(bps<4)
+                        return 0;
+                    return frame_bytes / ((FFALIGN(ch, 2) * bps) / 8);
+                case AV_CODEC_ID_S302M:
+                    return 2 * (frame_bytes / ((bps + 4) / 4)) / ch;
+                }
+            }
+        }
+    }
+
+    /* Fall back on using frame_size */
+    if (frame_size > 1 && frame_bytes)
+        return frame_size;
+
+    //For WMA we currently have no other means to calculate duration thus we
+    //do it here by assuming CBR, which is true for all known cases.
+    if (bitrate > 0 && frame_bytes > 0 && sr > 0 && ba > 1) {
+        if (id == AV_CODEC_ID_WMAV1 || id == AV_CODEC_ID_WMAV2)
+            return  (frame_bytes * 8LL * sr) / bitrate;
+    }
+
+    return 0;
+}
+
+int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
+{
+    return get_audio_frame_duration(avctx->codec_id, avctx->sample_rate,
+                                    avctx->channels, avctx->block_align,
+                                    avctx->codec_tag, avctx->bits_per_coded_sample,
+                                    avctx->bit_rate, avctx->extradata, avctx->frame_size,
+                                    frame_bytes);
+}
+
+int av_get_audio_frame_duration2(AVCodecParameters *par, int frame_bytes)
+{
+    return get_audio_frame_duration(par->codec_id, par->sample_rate,
+                                    par->channels, par->block_align,
+                                    par->codec_tag, par->bits_per_coded_sample,
+                                    par->bit_rate, par->extradata, par->frame_size,
+                                    frame_bytes);
+}
+
+#if !HAVE_THREADS
+int ff_thread_init(AVCodecContext *s)
+{
+    return -1;
+}
+
+#endif
+
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v)
+{
+    unsigned int n = 0;
+
+    while (v >= 0xff) {
+        *s++ = 0xff;
+        v -= 0xff;
+        n++;
+    }
+    *s = v;
+    n++;
+    return n;
+}
+
+int ff_match_2uint16(const uint16_t(*tab)[2], int size, int a, int b)
+{
+    int i;
+    for (i = 0; i < size && !(tab[i][0] == a && tab[i][1] == b); i++) ;
+    return i;
+}
+
+#if FF_API_MISSING_SAMPLE
+FF_DISABLE_DEPRECATION_WARNINGS
+void av_log_missing_feature(void *avc, const char *feature, int want_sample)
+{
+    av_log(avc, AV_LOG_WARNING, "%s is not implemented. Update your FFmpeg "
+            "version to the newest one from Git. If the problem still "
+            "occurs, it means that your file has a feature which has not "
+            "been implemented.\n", feature);
+    if(want_sample)
+        av_log_ask_for_sample(avc, NULL);
+}
+
+void av_log_ask_for_sample(void *avc, const char *msg, ...)
+{
+    va_list argument_list;
+
+    va_start(argument_list, msg);
+
+    if (msg)
+        av_vlog(avc, AV_LOG_WARNING, msg, argument_list);
+    av_log(avc, AV_LOG_WARNING, "If you want to help, upload a sample "
+            "of this file to ftp://upload.ffmpeg.org/incoming/ "
+            "and contact the ffmpeg-devel mailing list. (ffmpeg-devel@ffmpeg.org)\n");
+
+    va_end(argument_list);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif /* FF_API_MISSING_SAMPLE */
+
+static AVHWAccel *first_hwaccel = NULL;
+static AVHWAccel **last_hwaccel = &first_hwaccel;
+
+void av_register_hwaccel(AVHWAccel *hwaccel)
+{
+    AVHWAccel **p = last_hwaccel;
+    hwaccel->next = NULL;
+    while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, hwaccel))
+        p = &(*p)->next;
+    last_hwaccel = &hwaccel->next;
+}
+
+AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel)
+{
+    return hwaccel ? hwaccel->next : first_hwaccel;
+}
+
+int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op))
+{
+    if (lockmgr_cb) {
+        // There is no good way to rollback a failure to destroy the
+        // mutex, so we ignore failures.
+        lockmgr_cb(&codec_mutex,    AV_LOCK_DESTROY);
+        lockmgr_cb(&avformat_mutex, AV_LOCK_DESTROY);
+        lockmgr_cb     = NULL;
+        codec_mutex    = NULL;
+        avformat_mutex = NULL;
+    }
+
+    if (cb) {
+        void *new_codec_mutex    = NULL;
+        void *new_avformat_mutex = NULL;
+        int err;
+        if (err = cb(&new_codec_mutex, AV_LOCK_CREATE)) {
+            return err > 0 ? AVERROR_UNKNOWN : err;
+        }
+        if (err = cb(&new_avformat_mutex, AV_LOCK_CREATE)) {
+            // Ignore failures to destroy the newly created mutex.
+            cb(&new_codec_mutex, AV_LOCK_DESTROY);
+            return err > 0 ? AVERROR_UNKNOWN : err;
+        }
+        lockmgr_cb     = cb;
+        codec_mutex    = new_codec_mutex;
+        avformat_mutex = new_avformat_mutex;
+    }
+
+    return 0;
+}
+
+int ff_lock_avcodec(AVCodecContext *log_ctx, const AVCodec *codec)
+{
+    if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
+        return 0;
+
+    if (lockmgr_cb) {
+        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
+            return -1;
+    }
+
+    if (avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, 1) != 1) {
+        av_log(log_ctx, AV_LOG_ERROR,
+               "Insufficient thread locking. At least %d threads are "
+               "calling avcodec_open2() at the same time right now.\n",
+               entangled_thread_counter);
+        if (!lockmgr_cb)
+            av_log(log_ctx, AV_LOG_ERROR, "No lock manager is set, please see av_lockmgr_register()\n");
+        ff_avcodec_locked = 1;
+        ff_unlock_avcodec(codec);
+        return AVERROR(EINVAL);
+    }
+    av_assert0(!ff_avcodec_locked);
+    ff_avcodec_locked = 1;
+    return 0;
+}
+
+int ff_unlock_avcodec(const AVCodec *codec)
+{
+    if (codec->caps_internal & FF_CODEC_CAP_INIT_THREADSAFE || !codec->init)
+        return 0;
+
+    av_assert0(ff_avcodec_locked);
+    ff_avcodec_locked = 0;
+    avpriv_atomic_int_add_and_fetch(&entangled_thread_counter, -1);
+    if (lockmgr_cb) {
+        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_RELEASE))
+            return -1;
+    }
+
+    return 0;
+}
+
+int avpriv_lock_avformat(void)
+{
+    if (lockmgr_cb) {
+        if ((*lockmgr_cb)(&avformat_mutex, AV_LOCK_OBTAIN))
+            return -1;
+    }
+    return 0;
+}
+
+int avpriv_unlock_avformat(void)
+{
+    if (lockmgr_cb) {
+        if ((*lockmgr_cb)(&avformat_mutex, AV_LOCK_RELEASE))
+            return -1;
+    }
+    return 0;
+}
+
+unsigned int avpriv_toupper4(unsigned int x)
+{
+    return av_toupper(x & 0xFF) +
+          (av_toupper((x >>  8) & 0xFF) << 8)  +
+          (av_toupper((x >> 16) & 0xFF) << 16) +
+((unsigned)av_toupper((x >> 24) & 0xFF) << 24);
+}
+
+int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
+{
+    int ret;
+
+    dst->owner = src->owner;
+
+    ret = av_frame_ref(dst->f, src->f);
+    if (ret < 0)
+        return ret;
+
+    av_assert0(!dst->progress);
+
+    if (src->progress &&
+        !(dst->progress = av_buffer_ref(src->progress))) {
+        ff_thread_release_buffer(dst->owner, dst);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+#if !HAVE_THREADS
+
+enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
+{
+    return ff_get_format(avctx, fmt);
+}
+
+int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
+{
+    f->owner = avctx;
+    return ff_get_buffer(avctx, f->f, flags);
+}
+
+void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
+{
+    if (f->f)
+        av_frame_unref(f->f);
+}
+
+void ff_thread_finish_setup(AVCodecContext *avctx)
+{
+}
+
+void ff_thread_report_progress(ThreadFrame *f, int progress, int field)
+{
+}
+
+void ff_thread_await_progress(ThreadFrame *f, int progress, int field)
+{
+}
+
+int ff_thread_can_start_frame(AVCodecContext *avctx)
+{
+    return 1;
+}
+
+int ff_alloc_entries(AVCodecContext *avctx, int count)
+{
+    return 0;
+}
+
+void ff_reset_entries(AVCodecContext *avctx)
+{
+}
+
+void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift)
+{
+}
+
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n)
+{
+}
+
+#endif
+
+int avcodec_is_open(AVCodecContext *s)
+{
+    return !!s->internal;
+}
+
+int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf)
+{
+    int ret;
+    char *str;
+
+    ret = av_bprint_finalize(buf, &str);
+    if (ret < 0)
+        return ret;
+    if (!av_bprint_is_complete(buf)) {
+        av_free(str);
+        return AVERROR(ENOMEM);
+    }
+
+    avctx->extradata = str;
+    /* Note: the string is NUL terminated (so extradata can be read as a
+     * string), but the ending character is not accounted in the size (in
+     * binary formats you are likely not supposed to mux that character). When
+     * extradata is copied, it is also padded with AV_INPUT_BUFFER_PADDING_SIZE
+     * zeros. */
+    avctx->extradata_size = buf->len;
+    return 0;
+}
+
+const uint8_t *avpriv_find_start_code(const uint8_t *av_restrict p,
+                                      const uint8_t *end,
+                                      uint32_t *av_restrict state)
+{
+    int i;
+
+    av_assert0(p <= end);
+    if (p >= end)
+        return end;
+
+    for (i = 0; i < 3; i++) {
+        uint32_t tmp = *state << 8;
+        *state = tmp + *(p++);
+        if (tmp == 0x100 || p == end)
+            return p;
+    }
+
+    while (p < end) {
+        if      (p[-1] > 1      ) p += 3;
+        else if (p[-2]          ) p += 2;
+        else if (p[-3]|(p[-1]-1)) p++;
+        else {
+            p++;
+            break;
+        }
+    }
+
+    p = FFMIN(p, end) - 4;
+    *state = AV_RB32(p);
+
+    return p + 4;
+}
+
+AVCPBProperties *av_cpb_properties_alloc(size_t *size)
+{
+    AVCPBProperties *props = av_mallocz(sizeof(AVCPBProperties));
+    if (!props)
+        return NULL;
+
+    if (size)
+        *size = sizeof(*props);
+
+    props->vbv_delay = UINT64_MAX;
+
+    return props;
+}
+
+AVCPBProperties *ff_add_cpb_side_data(AVCodecContext *avctx)
+{
+    AVPacketSideData *tmp;
+    AVCPBProperties  *props;
+    size_t size;
+
+    props = av_cpb_properties_alloc(&size);
+    if (!props)
+        return NULL;
+
+    tmp = av_realloc_array(avctx->coded_side_data, avctx->nb_coded_side_data + 1, sizeof(*tmp));
+    if (!tmp) {
+        av_freep(&props);
+        return NULL;
+    }
+
+    avctx->coded_side_data = tmp;
+    avctx->nb_coded_side_data++;
+
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].type = AV_PKT_DATA_CPB_PROPERTIES;
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].data = (uint8_t*)props;
+    avctx->coded_side_data[avctx->nb_coded_side_data - 1].size = size;
+
+    return props;
+}
+
+static void codec_parameters_reset(AVCodecParameters *par)
+{
+    av_freep(&par->extradata);
+
+    memset(par, 0, sizeof(*par));
+
+    par->codec_type          = AVMEDIA_TYPE_UNKNOWN;
+    par->codec_id            = AV_CODEC_ID_NONE;
+    par->format              = -1;
+    par->field_order         = AV_FIELD_UNKNOWN;
+    par->color_range         = AVCOL_RANGE_UNSPECIFIED;
+    par->color_primaries     = AVCOL_PRI_UNSPECIFIED;
+    par->color_trc           = AVCOL_TRC_UNSPECIFIED;
+    par->color_space         = AVCOL_SPC_UNSPECIFIED;
+    par->chroma_location     = AVCHROMA_LOC_UNSPECIFIED;
+    par->sample_aspect_ratio = (AVRational){ 0, 1 };
+    par->profile             = FF_PROFILE_UNKNOWN;
+    par->level               = FF_LEVEL_UNKNOWN;
+}
+
+AVCodecParameters *avcodec_parameters_alloc(void)
+{
+    AVCodecParameters *par = av_mallocz(sizeof(*par));
+
+    if (!par)
+        return NULL;
+    codec_parameters_reset(par);
+    return par;
+}
+
+void avcodec_parameters_free(AVCodecParameters **ppar)
+{
+    AVCodecParameters *par = *ppar;
+
+    if (!par)
+        return;
+    codec_parameters_reset(par);
+
+    av_freep(ppar);
+}
+
+int avcodec_parameters_copy(AVCodecParameters *dst, const AVCodecParameters *src)
+{
+    codec_parameters_reset(dst);
+    memcpy(dst, src, sizeof(*dst));
+
+    dst->extradata      = NULL;
+    dst->extradata_size = 0;
+    if (src->extradata) {
+        dst->extradata = av_mallocz(src->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!dst->extradata)
+            return AVERROR(ENOMEM);
+        memcpy(dst->extradata, src->extradata, src->extradata_size);
+        dst->extradata_size = src->extradata_size;
+    }
+
+    return 0;
+}
+
+int avcodec_parameters_from_context(AVCodecParameters *par,
+                                    const AVCodecContext *codec)
+{
+    codec_parameters_reset(par);
+
+    par->codec_type = codec->codec_type;
+    par->codec_id   = codec->codec_id;
+    par->codec_tag  = codec->codec_tag;
+
+    par->bit_rate              = codec->bit_rate;
+    par->bits_per_coded_sample = codec->bits_per_coded_sample;
+    par->bits_per_raw_sample   = codec->bits_per_raw_sample;
+    par->profile               = codec->profile;
+    par->level                 = codec->level;
+
+    switch (par->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        par->format              = codec->pix_fmt;
+        par->width               = codec->width;
+        par->height              = codec->height;
+        par->field_order         = codec->field_order;
+        par->color_range         = codec->color_range;
+        par->color_primaries     = codec->color_primaries;
+        par->color_trc           = codec->color_trc;
+        par->color_space         = codec->colorspace;
+        par->chroma_location     = codec->chroma_sample_location;
+        par->sample_aspect_ratio = codec->sample_aspect_ratio;
+        par->video_delay         = codec->has_b_frames;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        par->format          = codec->sample_fmt;
+        par->channel_layout  = codec->channel_layout;
+        par->channels        = codec->channels;
+        par->sample_rate     = codec->sample_rate;
+        par->block_align     = codec->block_align;
+        par->frame_size      = codec->frame_size;
+        par->initial_padding = codec->initial_padding;
+        par->seek_preroll    = codec->seek_preroll;
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        par->width  = codec->width;
+        par->height = codec->height;
+        break;
+    }
+
+    if (codec->extradata) {
+        par->extradata = av_mallocz(codec->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!par->extradata)
+            return AVERROR(ENOMEM);
+        memcpy(par->extradata, codec->extradata, codec->extradata_size);
+        par->extradata_size = codec->extradata_size;
+    }
+
+    return 0;
+}
+
+int avcodec_parameters_to_context(AVCodecContext *codec,
+                                  const AVCodecParameters *par)
+{
+    codec->codec_type = par->codec_type;
+    codec->codec_id   = par->codec_id;
+    codec->codec_tag  = par->codec_tag;
+
+    codec->bit_rate              = par->bit_rate;
+    codec->bits_per_coded_sample = par->bits_per_coded_sample;
+    codec->bits_per_raw_sample   = par->bits_per_raw_sample;
+    codec->profile               = par->profile;
+    codec->level                 = par->level;
+
+    switch (par->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+        codec->pix_fmt                = par->format;
+        codec->width                  = par->width;
+        codec->height                 = par->height;
+        codec->field_order            = par->field_order;
+        codec->color_range            = par->color_range;
+        codec->color_primaries        = par->color_primaries;
+        codec->color_trc              = par->color_trc;
+        codec->colorspace             = par->color_space;
+        codec->chroma_sample_location = par->chroma_location;
+        codec->sample_aspect_ratio    = par->sample_aspect_ratio;
+        codec->has_b_frames           = par->video_delay;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        codec->sample_fmt      = par->format;
+        codec->channel_layout  = par->channel_layout;
+        codec->channels        = par->channels;
+        codec->sample_rate     = par->sample_rate;
+        codec->block_align     = par->block_align;
+        codec->frame_size      = par->frame_size;
+        codec->delay           =
+        codec->initial_padding = par->initial_padding;
+        codec->seek_preroll    = par->seek_preroll;
+        break;
+    case AVMEDIA_TYPE_SUBTITLE:
+        codec->width  = par->width;
+        codec->height = par->height;
+        break;
+    }
+
+    if (par->extradata) {
+        av_freep(&codec->extradata);
+        codec->extradata = av_mallocz(par->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (!codec->extradata)
+            return AVERROR(ENOMEM);
+        memcpy(codec->extradata, par->extradata, par->extradata_size);
+        codec->extradata_size = par->extradata_size;
+    }
+
+    return 0;
+}
+
+int ff_alloc_a53_sei(const AVFrame *frame, size_t prefix_len,
+                     void **data, size_t *sei_size)
+{
+    AVFrameSideData *side_data = NULL;
+    uint8_t *sei_data;
+
+    if (frame)
+        side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);
+
+    if (!side_data) {
+        *data = NULL;
+        return 0;
+    }
+
+    *sei_size = side_data->size + 11;
+    *data = av_mallocz(*sei_size + prefix_len);
+    if (!*data)
+        return AVERROR(ENOMEM);
+    sei_data = (uint8_t*)*data + prefix_len;
+
+    // country code
+    sei_data[0] = 181;
+    sei_data[1] = 0;
+    sei_data[2] = 49;
+
+    /**
+     * 'GA94' is standard in North America for ATSC, but hard coding
+     * this style may not be the right thing to do -- other formats
+     * do exist. This information is not available in the side_data
+     * so we are going with this right now.
+     */
+    AV_WL32(sei_data + 3, MKTAG('G', 'A', '9', '4'));
+    sei_data[7] = 3;
+    sei_data[8] = ((side_data->size/3) & 0x1f) | 0x40;
+    sei_data[9] = 0;
+
+    memcpy(sei_data + 10, side_data->data, side_data->size);
+
+    sei_data[side_data->size+10] = 255;
+
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/version.h b/media/ffvpx/libavcodec/version.h
new file mode 100644
index 000000000..4f6423b15
--- /dev/null
+++ b/media/ffvpx/libavcodec/version.h
@@ -0,0 +1,230 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VERSION_H
+#define AVCODEC_VERSION_H
+
+/**
+ * @file
+ * @ingroup libavc
+ * Libavcodec version macros.
+ */
+
+#include "libavutil/version.h"
+
+#define LIBAVCODEC_VERSION_MAJOR  57
+#define LIBAVCODEC_VERSION_MINOR  48
+#define LIBAVCODEC_VERSION_MICRO 101
+
+#define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
+                                               LIBAVCODEC_VERSION_MINOR, \
+                                               LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_VERSION      AV_VERSION(LIBAVCODEC_VERSION_MAJOR,    \
+                                           LIBAVCODEC_VERSION_MINOR,    \
+                                           LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
+
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+
+/**
+ * FF_API_* defines may be placed below to indicate public API that will be
+ * dropped at a future version bump. The defines themselves are not part of
+ * the public API and may change, break or disappear at any time.
+ *
+ * @note, when bumping the major version it is recommended to manually
+ * disable each FF_API_* in its own commit instead of disabling them all
+ * at once through the bump. This improves the git bisect-ability of the change.
+ */
+
+#ifndef FF_API_VIMA_DECODER
+#define FF_API_VIMA_DECODER     (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_AUDIO_CONVERT
+#define FF_API_AUDIO_CONVERT     (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_AVCODEC_RESAMPLE
+#define FF_API_AVCODEC_RESAMPLE  FF_API_AUDIO_CONVERT
+#endif
+#ifndef FF_API_GETCHROMA
+#define FF_API_GETCHROMA         (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_MISSING_SAMPLE
+#define FF_API_MISSING_SAMPLE    (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_LOWRES
+#define FF_API_LOWRES            (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_CAP_VDPAU
+#define FF_API_CAP_VDPAU         (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_BUFS_VDPAU
+#define FF_API_BUFS_VDPAU        (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_VOXWARE
+#define FF_API_VOXWARE           (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_SET_DIMENSIONS
+#define FF_API_SET_DIMENSIONS    (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_DEBUG_MV
+#define FF_API_DEBUG_MV          (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_AC_VLC
+#define FF_API_AC_VLC            (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_OLD_MSMPEG4
+#define FF_API_OLD_MSMPEG4       (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_ASPECT_EXTENDED
+#define FF_API_ASPECT_EXTENDED   (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_ARCH_ALPHA
+#define FF_API_ARCH_ALPHA        (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_XVMC
+#define FF_API_XVMC              (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_ERROR_RATE
+#define FF_API_ERROR_RATE        (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_QSCALE_TYPE
+#define FF_API_QSCALE_TYPE       (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_MB_TYPE
+#define FF_API_MB_TYPE           (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_MAX_BFRAMES
+#define FF_API_MAX_BFRAMES       (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_NEG_LINESIZES
+#define FF_API_NEG_LINESIZES     (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_EMU_EDGE
+#define FF_API_EMU_EDGE          (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_ARCH_SH4
+#define FF_API_ARCH_SH4          (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_ARCH_SPARC
+#define FF_API_ARCH_SPARC        (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_UNUSED_MEMBERS
+#define FF_API_UNUSED_MEMBERS    (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_IDCT_XVIDMMX
+#define FF_API_IDCT_XVIDMMX      (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_INPUT_PRESERVED
+#define FF_API_INPUT_PRESERVED   (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_NORMALIZE_AQP
+#define FF_API_NORMALIZE_AQP     (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_GMC
+#define FF_API_GMC               (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_MV0
+#define FF_API_MV0               (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_CODEC_NAME
+#define FF_API_CODEC_NAME        (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_AFD
+#define FF_API_AFD               (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_VISMV
+/* XXX: don't forget to drop the -vismv documentation */
+#define FF_API_VISMV             (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_AUDIOENC_DELAY
+#define FF_API_AUDIOENC_DELAY    (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_VAAPI_CONTEXT
+#define FF_API_VAAPI_CONTEXT     (LIBAVCODEC_VERSION_MAJOR < 58)
+#endif
+#ifndef FF_API_AVCTX_TIMEBASE
+#define FF_API_AVCTX_TIMEBASE    (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_MPV_OPT
+#define FF_API_MPV_OPT           (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_STREAM_CODEC_TAG
+#define FF_API_STREAM_CODEC_TAG  (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_QUANT_BIAS
+#define FF_API_QUANT_BIAS        (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_RC_STRATEGY
+#define FF_API_RC_STRATEGY       (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_CODED_FRAME
+#define FF_API_CODED_FRAME       (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_MOTION_EST
+#define FF_API_MOTION_EST        (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_WITHOUT_PREFIX
+#define FF_API_WITHOUT_PREFIX    (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_SIDEDATA_ONLY_PKT
+#define FF_API_SIDEDATA_ONLY_PKT (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_VDPAU_PROFILE
+#define FF_API_VDPAU_PROFILE     (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_CONVERGENCE_DURATION
+#define FF_API_CONVERGENCE_DURATION (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_AVPICTURE
+#define FF_API_AVPICTURE         (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_AVPACKET_OLD_API
+#define FF_API_AVPACKET_OLD_API (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_RTP_CALLBACK
+#define FF_API_RTP_CALLBACK      (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_VBV_DELAY
+#define FF_API_VBV_DELAY         (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_CODER_TYPE
+#define FF_API_CODER_TYPE        (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_STAT_BITS
+#define FF_API_STAT_BITS         (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_PRIVATE_OPT
+#define FF_API_PRIVATE_OPT      (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_ASS_TIMING
+#define FF_API_ASS_TIMING       (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_OLD_BSF
+#define FF_API_OLD_BSF          (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_COPY_CONTEXT
+#define FF_API_COPY_CONTEXT     (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_GET_CONTEXT_DEFAULTS
+#define FF_API_GET_CONTEXT_DEFAULTS (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+#ifndef FF_API_NVENC_OLD_NAME
+#define FF_API_NVENC_OLD_NAME    (LIBAVCODEC_VERSION_MAJOR < 59)
+#endif
+
+#endif /* AVCODEC_VERSION_H */
diff --git a/media/ffvpx/libavcodec/videodsp.c b/media/ffvpx/libavcodec/videodsp.c
new file mode 100644
index 000000000..ba618a7bb
--- /dev/null
+++ b/media/ffvpx/libavcodec/videodsp.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "videodsp.h"
+
+#define BIT_DEPTH 8
+#include "videodsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 16
+#include "videodsp_template.c"
+#undef BIT_DEPTH
+
+static void just_return(uint8_t *buf, ptrdiff_t stride, int h)
+{
+}
+
+av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
+{
+    ctx->prefetch = just_return;
+    if (bpc <= 8) {
+        ctx->emulated_edge_mc = ff_emulated_edge_mc_8;
+    } else {
+        ctx->emulated_edge_mc = ff_emulated_edge_mc_16;
+    }
+
+    if (ARCH_AARCH64)
+        ff_videodsp_init_aarch64(ctx, bpc);
+    if (ARCH_ARM)
+        ff_videodsp_init_arm(ctx, bpc);
+    if (ARCH_PPC)
+        ff_videodsp_init_ppc(ctx, bpc);
+    if (ARCH_X86)
+        ff_videodsp_init_x86(ctx, bpc);
+}
diff --git a/media/ffvpx/libavcodec/videodsp.h b/media/ffvpx/libavcodec/videodsp.h
new file mode 100644
index 000000000..fc01a31dc
--- /dev/null
+++ b/media/ffvpx/libavcodec/videodsp.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Core video DSP helper functions
+ */
+
+#ifndef AVCODEC_VIDEODSP_H
+#define AVCODEC_VIDEODSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define EMULATED_EDGE(depth) \
+void ff_emulated_edge_mc_ ## depth(uint8_t *dst, const uint8_t *src, \
+                                   ptrdiff_t dst_stride, ptrdiff_t src_stride, \
+                                   int block_w, int block_h,\
+                                   int src_x, int src_y, int w, int h);
+
+EMULATED_EDGE(8)
+EMULATED_EDGE(16)
+
+typedef struct VideoDSPContext {
+    /**
+     * Copy a rectangular area of samples to a temporary buffer and replicate
+     * the border samples.
+     *
+     * @param dst destination buffer
+     * @param dst_stride number of bytes between 2 vertically adjacent samples
+     *                   in destination buffer
+     * @param src source buffer
+     * @param dst_linesize number of bytes between 2 vertically adjacent
+     *                     samples in the destination buffer
+     * @param src_linesize number of bytes between 2 vertically adjacent
+     *                     samples in both the source buffer
+     * @param block_w width of block
+     * @param block_h height of block
+     * @param src_x x coordinate of the top left sample of the block in the
+     *                source buffer
+     * @param src_y y coordinate of the top left sample of the block in the
+     *                source buffer
+     * @param w width of the source buffer
+     * @param h height of the source buffer
+     */
+    void (*emulated_edge_mc)(uint8_t *dst, const uint8_t *src,
+                             ptrdiff_t dst_linesize,
+                             ptrdiff_t src_linesize,
+                             int block_w, int block_h,
+                             int src_x, int src_y, int w, int h);
+
+    /**
+     * Prefetch memory into cache (if supported by hardware).
+     *
+     * @param buf    pointer to buffer to prefetch memory from
+     * @param stride distance between two lines of buf (in bytes)
+     * @param h      number of lines to prefetch
+     */
+    void (*prefetch)(uint8_t *buf, ptrdiff_t stride, int h);
+} VideoDSPContext;
+
+void ff_videodsp_init(VideoDSPContext *ctx, int bpc);
+
+/* for internal use only (i.e. called by ff_videodsp_init() */
+void ff_videodsp_init_aarch64(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_arm(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_ppc(VideoDSPContext *ctx, int bpc);
+void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc);
+
+#endif /* AVCODEC_VIDEODSP_H */
diff --git a/media/ffvpx/libavcodec/videodsp_template.c b/media/ffvpx/libavcodec/videodsp_template.c
new file mode 100644
index 000000000..94c1b7188
--- /dev/null
+++ b/media/ffvpx/libavcodec/videodsp_template.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2002-2012 Michael Niedermayer
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "bit_depth_template.c"
+void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
+                               ptrdiff_t buf_linesize,
+                               ptrdiff_t src_linesize,
+                               int block_w, int block_h,
+                               int src_x, int src_y, int w, int h)
+{
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+
+    if (!w || !h)
+        return;
+
+    av_assert2(block_w * sizeof(pixel) <= FFABS(buf_linesize));
+
+    if (src_y >= h) {
+        src -= src_y * src_linesize;
+        src += (h - 1) * src_linesize;
+        src_y = h - 1;
+    } else if (src_y <= -block_h) {
+        src -= src_y * src_linesize;
+        src += (1 - block_h) * src_linesize;
+        src_y = 1 - block_h;
+    }
+    if (src_x >= w) {
+        src  += (w - 1 - src_x) * sizeof(pixel);
+        src_x = w - 1;
+    } else if (src_x <= -block_w) {
+        src  += (1 - block_w - src_x) * sizeof(pixel);
+        src_x = 1 - block_w;
+    }
+
+    start_y = FFMAX(0, -src_y);
+    start_x = FFMAX(0, -src_x);
+    end_y = FFMIN(block_h, h-src_y);
+    end_x = FFMIN(block_w, w-src_x);
+    av_assert2(start_y < end_y && block_h);
+    av_assert2(start_x < end_x && block_w);
+
+    w    = end_x - start_x;
+    src += start_y * src_linesize + start_x * sizeof(pixel);
+    buf += start_x * sizeof(pixel);
+
+    // top
+    for (y = 0; y < start_y; y++) {
+        memcpy(buf, src, w * sizeof(pixel));
+        buf += buf_linesize;
+    }
+
+    // copy existing part
+    for (; y < end_y; y++) {
+        memcpy(buf, src, w * sizeof(pixel));
+        src += src_linesize;
+        buf += buf_linesize;
+    }
+
+    // bottom
+    src -= src_linesize;
+    for (; y < block_h; y++) {
+        memcpy(buf, src, w * sizeof(pixel));
+        buf += buf_linesize;
+    }
+
+    buf -= block_h * buf_linesize + start_x * sizeof(pixel);
+    while (block_h--) {
+        pixel *bufp = (pixel *) buf;
+
+        // left
+        for(x = 0; x < start_x; x++) {
+            bufp[x] = bufp[start_x];
+        }
+
+        // right
+        for (x = end_x; x < block_w; x++) {
+            bufp[x] = bufp[end_x - 1];
+        }
+        buf += buf_linesize;
+    }
+}
diff --git a/media/ffvpx/libavcodec/vlc.h b/media/ffvpx/libavcodec/vlc.h
new file mode 100644
index 000000000..9e38f8c6b
--- /dev/null
+++ b/media/ffvpx/libavcodec/vlc.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VLC_H
+#define AVCODEC_VLC_H
+
+#include <stdint.h>
+
+#define VLC_TYPE int16_t
+
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
+#define init_vlc(vlc, nb_bits, nb_codes,                \
+                 bits, bits_wrap, bits_size,            \
+                 codes, codes_wrap, codes_size,         \
+                 flags)                                 \
+    ff_init_vlc_sparse(vlc, nb_bits, nb_codes,          \
+                       bits, bits_wrap, bits_size,      \
+                       codes, codes_wrap, codes_size,   \
+                       NULL, 0, 0, flags)
+
+int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+                       const void *bits, int bits_wrap, int bits_size,
+                       const void *codes, int codes_wrap, int codes_size,
+                       const void *symbols, int symbols_wrap, int symbols_size,
+                       int flags);
+void ff_free_vlc(VLC *vlc);
+
+#define INIT_VLC_LE             2
+#define INIT_VLC_USE_NEW_STATIC 4
+
+#define INIT_VLC_STATIC(vlc, bits, a, b, c, d, e, f, g, static_size)       \
+    do {                                                                   \
+        static VLC_TYPE table[static_size][2];                             \
+        (vlc)->table           = table;                                    \
+        (vlc)->table_allocated = static_size;                              \
+        init_vlc(vlc, bits, a, b, c, d, e, f, g, INIT_VLC_USE_NEW_STATIC); \
+    } while (0)
+
+#endif /* AVCODEC_VLC_H */
diff --git a/media/ffvpx/libavcodec/vorbis_parser.c b/media/ffvpx/libavcodec/vorbis_parser.c
new file mode 100644
index 000000000..0b2c97cde
--- /dev/null
+++ b/media/ffvpx/libavcodec/vorbis_parser.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2012 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Vorbis audio parser
+ *
+ * Determines the duration for each packet.
+ */
+
+#include "libavutil/log.h"
+
+#include "get_bits.h"
+#include "parser.h"
+#include "xiph.h"
+#include "vorbis_parser_internal.h"
+
+static const AVClass vorbis_parser_class = {
+    .class_name = "Vorbis parser",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static int parse_id_header(AVVorbisParseContext *s,
+                           const uint8_t *buf, int buf_size)
+{
+    /* Id header should be 30 bytes */
+    if (buf_size < 30) {
+        av_log(s, AV_LOG_ERROR, "Id header is too short\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* make sure this is the Id header */
+    if (buf[0] != 1) {
+        av_log(s, AV_LOG_ERROR, "Wrong packet type in Id header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* check for header signature */
+    if (memcmp(&buf[1], "vorbis", 6)) {
+        av_log(s, AV_LOG_ERROR, "Invalid packet signature in Id header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!(buf[29] & 0x1)) {
+        av_log(s, AV_LOG_ERROR, "Invalid framing bit in Id header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->blocksize[0] = 1 << (buf[28] & 0xF);
+    s->blocksize[1] = 1 << (buf[28] >>  4);
+
+    return 0;
+}
+
+static int parse_setup_header(AVVorbisParseContext *s,
+                              const uint8_t *buf, int buf_size)
+{
+    GetBitContext gb, gb0;
+    uint8_t *rev_buf;
+    int i, ret = 0;
+    int got_framing_bit, mode_count, got_mode_header, last_mode_count = 0;
+
+    /* avoid overread */
+    if (buf_size < 7) {
+        av_log(s, AV_LOG_ERROR, "Setup header is too short\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* make sure this is the Setup header */
+    if (buf[0] != 5) {
+        av_log(s, AV_LOG_ERROR, "Wrong packet type in Setup header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* check for header signature */
+    if (memcmp(&buf[1], "vorbis", 6)) {
+        av_log(s, AV_LOG_ERROR, "Invalid packet signature in Setup header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* reverse bytes so we can easily read backwards with get_bits() */
+    if (!(rev_buf = av_malloc(buf_size))) {
+        av_log(s, AV_LOG_ERROR, "Out of memory\n");
+        return AVERROR(ENOMEM);
+    }
+    for (i = 0; i < buf_size; i++)
+        rev_buf[i] = buf[buf_size - 1 - i];
+    init_get_bits(&gb, rev_buf, buf_size * 8);
+
+    got_framing_bit = 0;
+    while (get_bits_left(&gb) > 97) {
+        if (get_bits1(&gb)) {
+            got_framing_bit = get_bits_count(&gb);
+            break;
+        }
+    }
+    if (!got_framing_bit) {
+        av_log(s, AV_LOG_ERROR, "Invalid Setup header\n");
+        ret = AVERROR_INVALIDDATA;
+        goto bad_header;
+    }
+
+    /* Now we search backwards to find possible valid mode counts. This is not
+     * fool-proof because we could have false positive matches and read too
+     * far, but there isn't really any way to be sure without parsing through
+     * all the many variable-sized fields before the modes. This approach seems
+     * to work well in testing, and it is similar to how it is handled in
+     * liboggz. */
+    mode_count = 0;
+    got_mode_header = 0;
+    while (get_bits_left(&gb) >= 97) {
+        if (get_bits(&gb, 8) > 63 || get_bits(&gb, 16) || get_bits(&gb, 16))
+            break;
+        skip_bits(&gb, 1);
+        mode_count++;
+        if (mode_count > 64)
+            break;
+        gb0 = gb;
+        if (get_bits(&gb0, 6) + 1 == mode_count) {
+            got_mode_header = 1;
+            last_mode_count = mode_count;
+        }
+    }
+    if (!got_mode_header) {
+        av_log(s, AV_LOG_ERROR, "Invalid Setup header\n");
+        ret = AVERROR_INVALIDDATA;
+        goto bad_header;
+    }
+    /* All samples I've seen use <= 2 modes, so ask for a sample if we find
+     * more than that, as it is most likely a false positive. If we get any
+     * we may need to approach this the long way and parse the whole Setup
+     * header, but I hope very much that it never comes to that. */
+    if (last_mode_count > 2) {
+        avpriv_request_sample(s,
+                              "%d modes (either a false positive or a "
+                              "sample from an unknown encoder)",
+                              last_mode_count);
+    }
+    /* We're limiting the mode count to 63 so that we know that the previous
+     * block flag will be in the first packet byte. */
+    if (last_mode_count > 63) {
+        av_log(s, AV_LOG_ERROR, "Unsupported mode count: %d\n",
+               last_mode_count);
+        ret = AVERROR_INVALIDDATA;
+        goto bad_header;
+    }
+    s->mode_count = mode_count = last_mode_count;
+    /* Determine the number of bits required to code the mode and turn that
+     * into a bitmask to directly access the mode from the first frame byte. */
+    s->mode_mask = ((1 << (av_log2(mode_count - 1) + 1)) - 1) << 1;
+    /* The previous window flag is the next bit after the mode */
+    s->prev_mask = (s->mode_mask | 0x1) + 1;
+
+    init_get_bits(&gb, rev_buf, buf_size * 8);
+    skip_bits_long(&gb, got_framing_bit);
+    for (i = mode_count - 1; i >= 0; i--) {
+        skip_bits_long(&gb, 40);
+        s->mode_blocksize[i] = get_bits1(&gb);
+    }
+
+bad_header:
+    av_free(rev_buf);
+    return ret;
+}
+
+static int vorbis_parse_init(AVVorbisParseContext *s,
+                             const uint8_t *extradata, int extradata_size)
+{
+    const uint8_t *header_start[3];
+    int header_len[3];
+    int ret;
+
+    s->class = &vorbis_parser_class;
+    s->extradata_parsed = 1;
+
+    if ((ret = avpriv_split_xiph_headers(extradata,
+                                         extradata_size, 30,
+                                         header_start, header_len)) < 0) {
+        av_log(s, AV_LOG_ERROR, "Extradata corrupt.\n");
+        return ret;
+    }
+
+    if ((ret = parse_id_header(s, header_start[0], header_len[0])) < 0)
+        return ret;
+
+    if ((ret = parse_setup_header(s, header_start[2], header_len[2])) < 0)
+        return ret;
+
+    s->valid_extradata = 1;
+    s->previous_blocksize = s->blocksize[s->mode_blocksize[0]];
+
+    return 0;
+}
+
+int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
+                                int buf_size, int *flags)
+{
+    int duration = 0;
+
+    if (s->valid_extradata && buf_size > 0) {
+        int mode, current_blocksize;
+        int previous_blocksize = s->previous_blocksize;
+
+        if (buf[0] & 1) {
+            /* If the user doesn't care about special packets, it's a bad one. */
+            if (!flags)
+                goto bad_packet;
+
+            /* Set the flag for which kind of special packet it is. */
+            if (buf[0] == 1)
+                *flags |= VORBIS_FLAG_HEADER;
+            else if (buf[0] == 3)
+                *flags |= VORBIS_FLAG_COMMENT;
+            else if (buf[0] == 5)
+                *flags |= VORBIS_FLAG_SETUP;
+            else
+                goto bad_packet;
+
+            /* Special packets have no duration. */
+            return 0;
+
+bad_packet:
+            av_log(s, AV_LOG_ERROR, "Invalid packet\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (s->mode_count == 1)
+            mode = 0;
+        else
+            mode = (buf[0] & s->mode_mask) >> 1;
+        if (mode >= s->mode_count) {
+            av_log(s, AV_LOG_ERROR, "Invalid mode in packet\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if(s->mode_blocksize[mode]){
+            int flag = !!(buf[0] & s->prev_mask);
+            previous_blocksize = s->blocksize[flag];
+        }
+        current_blocksize     = s->blocksize[s->mode_blocksize[mode]];
+        duration              = (previous_blocksize + current_blocksize) >> 2;
+        s->previous_blocksize = current_blocksize;
+    }
+
+    return duration;
+}
+
+int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
+                          int buf_size)
+{
+    return av_vorbis_parse_frame_flags(s, buf, buf_size, NULL);
+}
+
+void av_vorbis_parse_reset(AVVorbisParseContext *s)
+{
+    if (s->valid_extradata)
+        s->previous_blocksize = s->blocksize[0];
+}
+
+void av_vorbis_parse_free(AVVorbisParseContext **s)
+{
+    av_freep(s);
+}
+
+AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata,
+                                           int extradata_size)
+{
+    AVVorbisParseContext *s = av_mallocz(sizeof(*s));
+    int ret;
+
+    if (!s)
+        return NULL;
+
+    ret = vorbis_parse_init(s, extradata, extradata_size);
+    if (ret < 0) {
+        av_vorbis_parse_free(&s);
+        return NULL;
+    }
+
+    return s;
+}
+
+#if CONFIG_VORBIS_PARSER
+
+typedef struct VorbisParseContext {
+    AVVorbisParseContext *vp;
+} VorbisParseContext;
+
+static int vorbis_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size)
+{
+    VorbisParseContext *s = s1->priv_data;
+    int duration;
+
+    if (!s->vp && avctx->extradata && avctx->extradata_size) {
+        s->vp = av_vorbis_parse_init(avctx->extradata, avctx->extradata_size);
+    }
+    if (!s->vp)
+        goto end;
+
+    if ((duration = av_vorbis_parse_frame(s->vp, buf, buf_size)) >= 0)
+        s1->duration = duration;
+
+end:
+    /* always return the full packet. this parser isn't doing any splitting or
+       combining, only packet analysis */
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return buf_size;
+}
+
+static void vorbis_parser_close(AVCodecParserContext *ctx)
+{
+    VorbisParseContext *s = ctx->priv_data;
+    av_vorbis_parse_free(&s->vp);
+}
+
+AVCodecParser ff_vorbis_parser = {
+    .codec_ids      = { AV_CODEC_ID_VORBIS },
+    .priv_data_size = sizeof(VorbisParseContext),
+    .parser_parse   = vorbis_parse,
+    .parser_close   = vorbis_parser_close,
+};
+#endif /* CONFIG_VORBIS_PARSER */
diff --git a/media/ffvpx/libavcodec/vorbis_parser.h b/media/ffvpx/libavcodec/vorbis_parser.h
new file mode 100644
index 000000000..92050277e
--- /dev/null
+++ b/media/ffvpx/libavcodec/vorbis_parser.h
@@ -0,0 +1,77 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * A public API for Vorbis parsing
+ *
+ * Determines the duration for each packet.
+ */
+
+#ifndef AVCODEC_VORBIS_PARSER_H
+#define AVCODEC_VORBIS_PARSER_H
+
+#include <stdint.h>
+
+typedef struct AVVorbisParseContext AVVorbisParseContext;
+
+/**
+ * Allocate and initialize the Vorbis parser using headers in the extradata.
+ *
+ * @param avctx codec context
+ * @param s     Vorbis parser context
+ */
+AVVorbisParseContext *av_vorbis_parse_init(const uint8_t *extradata,
+                                           int extradata_size);
+
+/**
+ * Free the parser and everything associated with it.
+ */
+void av_vorbis_parse_free(AVVorbisParseContext **s);
+
+#define VORBIS_FLAG_HEADER  0x00000001
+#define VORBIS_FLAG_COMMENT 0x00000002
+#define VORBIS_FLAG_SETUP   0x00000004
+
+/**
+ * Get the duration for a Vorbis packet.
+ *
+ * If @p flags is @c NULL,
+ * special frames are considered invalid.
+ *
+ * @param s        Vorbis parser context
+ * @param buf      buffer containing a Vorbis frame
+ * @param buf_size size of the buffer
+ * @param flags    flags for special frames
+ */
+int av_vorbis_parse_frame_flags(AVVorbisParseContext *s, const uint8_t *buf,
+                                int buf_size, int *flags);
+
+/**
+ * Get the duration for a Vorbis packet.
+ *
+ * @param s        Vorbis parser context
+ * @param buf      buffer containing a Vorbis frame
+ * @param buf_size size of the buffer
+ */
+int av_vorbis_parse_frame(AVVorbisParseContext *s, const uint8_t *buf,
+                          int buf_size);
+
+void av_vorbis_parse_reset(AVVorbisParseContext *s);
+
+#endif /* AVCODEC_VORBIS_PARSER_H */
diff --git a/media/ffvpx/libavcodec/vorbis_parser_internal.h b/media/ffvpx/libavcodec/vorbis_parser_internal.h
new file mode 100644
index 000000000..691a84238
--- /dev/null
+++ b/media/ffvpx/libavcodec/vorbis_parser_internal.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2012 Justin Ruggles
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Vorbis audio parser
+ *
+ * Determines the duration for each packet.
+ */
+
+#ifndef AVCODEC_VORBIS_PARSER_INTERNAL_H
+#define AVCODEC_VORBIS_PARSER_INTERNAL_H
+
+#include "avcodec.h"
+#include "vorbis_parser.h"
+
+struct AVVorbisParseContext {
+    const AVClass *class;
+    int extradata_parsed;       ///< we have attempted to parse extradata
+    int valid_extradata;        ///< extradata is valid, so we can calculate duration
+    int blocksize[2];           ///< short and long window sizes
+    int previous_blocksize;     ///< previous window size
+    int mode_blocksize[64];     ///< window size mapping for each mode
+    int mode_count;             ///< number of modes
+    int mode_mask;              ///< bitmask used to get the mode in each packet
+    int prev_mask;              ///< bitmask used to get the previous mode flag in each packet
+};
+
+#endif /* AVCODEC_VORBIS_PARSER_INTERNAL_H */
diff --git a/media/ffvpx/libavcodec/vp3dsp.h b/media/ffvpx/libavcodec/vp3dsp.h
new file mode 100644
index 000000000..b95adae79
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp3dsp.h
@@ -0,0 +1,53 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP3DSP_H
+#define AVCODEC_VP3DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct VP3DSPContext {
+    /**
+     * Copy 8xH pixels from source to destination buffer using a bilinear
+     * filter with no rounding (i.e. *dst = (*a + *b) >> 1).
+     *
+     * @param dst destination buffer, aligned by 8
+     * @param a first source buffer, no alignment
+     * @param b second source buffer, no alignment
+     * @param stride distance between two lines in source/dest buffers
+     * @param h height
+     */
+    void (*put_no_rnd_pixels_l2)(uint8_t *dst,
+                                 const uint8_t *a,
+                                 const uint8_t *b,
+                                 ptrdiff_t stride, int h);
+
+    void (*idct_put)(uint8_t *dest, int line_size, int16_t *block);
+    void (*idct_add)(uint8_t *dest, int line_size, int16_t *block);
+    void (*idct_dc_add)(uint8_t *dest, int line_size, int16_t *block);
+    void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+} VP3DSPContext;
+
+void ff_vp3dsp_init(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags);
+
+#endif /* AVCODEC_VP3DSP_H */
diff --git a/media/ffvpx/libavcodec/vp56.h b/media/ffvpx/libavcodec/vp56.h
new file mode 100644
index 000000000..56c30919b
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp56.h
@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP5 and VP6 compatible video decoder (common features)
+ */
+
+#ifndef AVCODEC_VP56_H
+#define AVCODEC_VP56_H
+
+#include "get_bits.h"
+#include "hpeldsp.h"
+#include "bytestream.h"
+#include "h264chroma.h"
+#include "videodsp.h"
+#include "vp3dsp.h"
+#include "vp56dsp.h"
+
+typedef struct vp56_context VP56Context;
+
+typedef enum {
+    VP56_FRAME_NONE     =-1,
+    VP56_FRAME_CURRENT  = 0,
+    VP56_FRAME_PREVIOUS = 1,
+    VP56_FRAME_GOLDEN   = 2,
+    VP56_FRAME_GOLDEN2  = 3,
+} VP56Frame;
+
+typedef enum {
+    VP56_MB_INTER_NOVEC_PF = 0,  /**< Inter MB, no vector, from previous frame */
+    VP56_MB_INTRA          = 1,  /**< Intra MB */
+    VP56_MB_INTER_DELTA_PF = 2,  /**< Inter MB, above/left vector + delta, from previous frame */
+    VP56_MB_INTER_V1_PF    = 3,  /**< Inter MB, first vector, from previous frame */
+    VP56_MB_INTER_V2_PF    = 4,  /**< Inter MB, second vector, from previous frame */
+    VP56_MB_INTER_NOVEC_GF = 5,  /**< Inter MB, no vector, from golden frame */
+    VP56_MB_INTER_DELTA_GF = 6,  /**< Inter MB, above/left vector + delta, from golden frame */
+    VP56_MB_INTER_4V       = 7,  /**< Inter MB, 4 vectors, from previous frame */
+    VP56_MB_INTER_V1_GF    = 8,  /**< Inter MB, first vector, from golden frame */
+    VP56_MB_INTER_V2_GF    = 9,  /**< Inter MB, second vector, from golden frame */
+} VP56mb;
+
+typedef struct VP56Tree {
+  int8_t val;
+  int8_t prob_idx;
+} VP56Tree;
+
+typedef struct VP56mv {
+    DECLARE_ALIGNED(4, int16_t, x);
+    int16_t y;
+} VP56mv;
+
+#define VP56_SIZE_CHANGE 1
+
+typedef void (*VP56ParseVectorAdjustment)(VP56Context *s,
+                                          VP56mv *vect);
+typedef void (*VP56Filter)(VP56Context *s, uint8_t *dst, uint8_t *src,
+                           int offset1, int offset2, int stride,
+                           VP56mv mv, int mask, int select, int luma);
+typedef void (*VP56ParseCoeff)(VP56Context *s);
+typedef void (*VP56DefaultModelsInit)(VP56Context *s);
+typedef void (*VP56ParseVectorModels)(VP56Context *s);
+typedef int  (*VP56ParseCoeffModels)(VP56Context *s);
+typedef int  (*VP56ParseHeader)(VP56Context *s, const uint8_t *buf,
+                                int buf_size);
+
+typedef struct VP56RangeCoder {
+    int high;
+    int bits; /* stored negated (i.e. negative "bits" is a positive number of
+                 bits left) in order to eliminate a negate in cache refilling */
+    const uint8_t *buffer;
+    const uint8_t *end;
+    unsigned int code_word;
+} VP56RangeCoder;
+
+typedef struct VP56RefDc {
+    uint8_t not_null_dc;
+    VP56Frame ref_frame;
+    int16_t dc_coeff;
+} VP56RefDc;
+
+typedef struct VP56Macroblock {
+    uint8_t type;
+    VP56mv mv;
+} VP56Macroblock;
+
+typedef struct VP56Model {
+    uint8_t coeff_reorder[64];       /* used in vp6 only */
+    uint8_t coeff_index_to_pos[64];  /* used in vp6 only */
+    uint8_t vector_sig[2];           /* delta sign */
+    uint8_t vector_dct[2];           /* delta coding types */
+    uint8_t vector_pdi[2][2];        /* predefined delta init */
+    uint8_t vector_pdv[2][7];        /* predefined delta values */
+    uint8_t vector_fdv[2][8];        /* 8 bit delta value definition */
+    uint8_t coeff_dccv[2][11];       /* DC coeff value */
+    uint8_t coeff_ract[2][3][6][11]; /* Run/AC coding type and AC coeff value */
+    uint8_t coeff_acct[2][3][3][6][5];/* vp5 only AC coding type for coding group < 3 */
+    uint8_t coeff_dcct[2][36][5];    /* DC coeff coding type */
+    uint8_t coeff_runv[2][14];       /* run value (vp6 only) */
+    uint8_t mb_type[3][10][10];      /* model for decoding MB type */
+    uint8_t mb_types_stats[3][10][2];/* contextual, next MB type stats */
+} VP56Model;
+
+struct vp56_context {
+    AVCodecContext *avctx;
+    H264ChromaContext h264chroma;
+    HpelDSPContext hdsp;
+    VideoDSPContext vdsp;
+    VP3DSPContext vp3dsp;
+    VP56DSPContext vp56dsp;
+    uint8_t idct_scantable[64];
+    AVFrame *frames[4];
+    uint8_t *edge_emu_buffer_alloc;
+    uint8_t *edge_emu_buffer;
+    VP56RangeCoder c;
+    VP56RangeCoder cc;
+    VP56RangeCoder *ccp;
+    int sub_version;
+
+    /* frame info */
+    int golden_frame;
+    int plane_width[4];
+    int plane_height[4];
+    int mb_width;   /* number of horizontal MB */
+    int mb_height;  /* number of vertical MB */
+    int block_offset[6];
+
+    int quantizer;
+    uint16_t dequant_dc;
+    uint16_t dequant_ac;
+
+    /* DC predictors management */
+    VP56RefDc *above_blocks;
+    VP56RefDc left_block[4];
+    int above_block_idx[6];
+    int16_t prev_dc[3][3];    /* [plan][ref_frame] */
+
+    /* blocks / macroblock */
+    VP56mb mb_type;
+    VP56Macroblock *macroblocks;
+    DECLARE_ALIGNED(16, int16_t, block_coeff)[6][64];
+
+    /* motion vectors */
+    VP56mv mv[6];  /* vectors for each block in MB */
+    VP56mv vector_candidate[2];
+    int vector_candidate_pos;
+
+    /* filtering hints */
+    int filter_header;               /* used in vp6 only */
+    int deblock_filtering;
+    int filter_selection;
+    int filter_mode;
+    int max_vector_length;
+    int sample_variance_threshold;
+
+    uint8_t coeff_ctx[4][64];              /* used in vp5 only */
+    uint8_t coeff_ctx_last[4];             /* used in vp5 only */
+
+    int has_alpha;
+
+    /* upside-down flipping hints */
+    int flip;  /* are we flipping ? */
+    int frbi;  /* first row block index in MB */
+    int srbi;  /* second row block index in MB */
+    int stride[4];  /* stride for each plan */
+
+    const uint8_t *vp56_coord_div;
+    VP56ParseVectorAdjustment parse_vector_adjustment;
+    VP56Filter filter;
+    VP56ParseCoeff parse_coeff;
+    VP56DefaultModelsInit default_models_init;
+    VP56ParseVectorModels parse_vector_models;
+    VP56ParseCoeffModels parse_coeff_models;
+    VP56ParseHeader parse_header;
+
+    /* for "slice" parallelism between YUV and A */
+    VP56Context *alpha_context;
+
+    VP56Model *modelp;
+    VP56Model model;
+
+    /* huffman decoding */
+    int use_huffman;
+    GetBitContext gb;
+    VLC dccv_vlc[2];
+    VLC runv_vlc[2];
+    VLC ract_vlc[2][3][6];
+    unsigned int nb_null[2][2];       /* number of consecutive NULL DC/AC */
+};
+
+
+int ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
+int ff_vp56_init_context(AVCodecContext *avctx, VP56Context *s,
+                          int flip, int has_alpha);
+int ff_vp56_free(AVCodecContext *avctx);
+int ff_vp56_free_context(VP56Context *s);
+void ff_vp56_init_dequant(VP56Context *s, int quantizer);
+int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                         AVPacket *avpkt);
+
+
+/**
+ * vp56 specific range coder implementation
+ */
+
+extern const uint8_t ff_vp56_norm_shift[256];
+void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size);
+
+static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
+{
+    int shift = ff_vp56_norm_shift[c->high];
+    int bits = c->bits;
+    unsigned int code_word = c->code_word;
+
+    c->high   <<= shift;
+    code_word <<= shift;
+    bits       += shift;
+    if(bits >= 0 && c->buffer < c->end) {
+        code_word |= bytestream_get_be16(&c->buffer) << bits;
+        bits -= 16;
+    }
+    c->bits = bits;
+    return code_word;
+}
+
+#if   ARCH_ARM
+#include "arm/vp56_arith.h"
+#elif ARCH_X86
+#include "x86/vp56_arith.h"
+#endif
+
+#ifndef vp56_rac_get_prob
+#define vp56_rac_get_prob vp56_rac_get_prob
+static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+{
+    unsigned int code_word = vp56_rac_renorm(c);
+    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned int low_shift = low << 16;
+    int bit = code_word >= low_shift;
+
+    c->high = bit ? c->high - low : low;
+    c->code_word = bit ? code_word - low_shift : code_word;
+
+    return bit;
+}
+#endif
+
+#ifndef vp56_rac_get_prob_branchy
+// branchy variant, to be used where there's a branch based on the bit decoded
+static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
+{
+    unsigned long code_word = vp56_rac_renorm(c);
+    unsigned low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned low_shift = low << 16;
+
+    if (code_word >= low_shift) {
+        c->high     -= low;
+        c->code_word = code_word - low_shift;
+        return 1;
+    }
+
+    c->high = low;
+    c->code_word = code_word;
+    return 0;
+}
+#endif
+
+static av_always_inline int vp56_rac_get(VP56RangeCoder *c)
+{
+    unsigned int code_word = vp56_rac_renorm(c);
+    /* equiprobable */
+    int low = (c->high + 1) >> 1;
+    unsigned int low_shift = low << 16;
+    int bit = code_word >= low_shift;
+    if (bit) {
+        c->high   -= low;
+        code_word -= low_shift;
+    } else {
+        c->high = low;
+    }
+
+    c->code_word = code_word;
+    return bit;
+}
+
+// rounding is different than vp56_rac_get, is vp56_rac_get wrong?
+static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
+{
+    return vp56_rac_get_prob(c, 128);
+}
+
+static int vp56_rac_gets(VP56RangeCoder *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp56_rac_get(c);
+    }
+
+    return value;
+}
+
+static int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
+{
+    int value = 0;
+
+    while (bits--) {
+        value = (value << 1) | vp8_rac_get(c);
+    }
+
+    return value;
+}
+
+// fixme: add 1 bit to all the calls to this?
+static av_unused int vp8_rac_get_sint(VP56RangeCoder *c, int bits)
+{
+    int v;
+
+    if (!vp8_rac_get(c))
+        return 0;
+
+    v = vp8_rac_get_uint(c, bits);
+
+    if (vp8_rac_get(c))
+        v = -v;
+
+    return v;
+}
+
+// P(7)
+static av_unused int vp56_rac_gets_nn(VP56RangeCoder *c, int bits)
+{
+    int v = vp56_rac_gets(c, 7) << 1;
+    return v + !v;
+}
+
+static av_unused int vp8_rac_get_nn(VP56RangeCoder *c)
+{
+    int v = vp8_rac_get_uint(c, 7) << 1;
+    return v + !v;
+}
+
+static av_always_inline
+int vp56_rac_get_tree(VP56RangeCoder *c,
+                      const VP56Tree *tree,
+                      const uint8_t *probs)
+{
+    while (tree->val > 0) {
+        if (vp56_rac_get_prob_branchy(c, probs[tree->prob_idx]))
+            tree += tree->val;
+        else
+            tree++;
+    }
+    return -tree->val;
+}
+
+// how probabilities are associated with decisions is different I think
+// well, the new scheme fits in the old but this way has one fewer branches per decision
+static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2],
+                                   const uint8_t *probs)
+{
+    int i = 0;
+
+    do {
+        i = tree[i][vp56_rac_get_prob(c, probs[i])];
+    } while (i > 0);
+
+    return -i;
+}
+
+// DCTextra
+static av_always_inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob)
+{
+    int v = 0;
+
+    do {
+        v = (v<<1) + vp56_rac_get_prob(c, *prob++);
+    } while (*prob);
+
+    return v;
+}
+
+#endif /* AVCODEC_VP56_H */
diff --git a/media/ffvpx/libavcodec/vp56dsp.h b/media/ffvpx/libavcodec/vp56dsp.h
new file mode 100644
index 000000000..7807baa4b
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp56dsp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP56DSP_H
+#define AVCODEC_VP56DSP_H
+
+#include <stdint.h>
+#include "avcodec.h"
+
+typedef struct VP56DSPContext {
+    void (*edge_filter_hor)(uint8_t *yuv, int stride, int t);
+    void (*edge_filter_ver)(uint8_t *yuv, int stride, int t);
+
+    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
+} VP56DSPContext;
+
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
+                           const int16_t *h_weights, const int16_t *v_weights);
+
+void ff_vp56dsp_init(VP56DSPContext *s, enum AVCodecID codec);
+void ff_vp6dsp_init_arm(VP56DSPContext *s, enum AVCodecID codec);
+void ff_vp6dsp_init_x86(VP56DSPContext* c, enum AVCodecID codec);
+
+#endif /* AVCODEC_VP56DSP_H */
diff --git a/media/ffvpx/libavcodec/vp56rac.c b/media/ffvpx/libavcodec/vp56rac.c
new file mode 100644
index 000000000..6061b7ee7
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp56rac.c
@@ -0,0 +1,47 @@
+/*
+ * VP5/6/8 decoder
+ * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "vp56.h"
+
+const uint8_t ff_vp56_norm_shift[256]= {
+ 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size)
+{
+    c->high = 255;
+    c->bits = -16;
+    c->buffer = buf;
+    c->end = buf + buf_size;
+    c->code_word = bytestream_get_be24(&c->buffer);
+}
diff --git a/media/ffvpx/libavcodec/vp8.c b/media/ffvpx/libavcodec/vp8.c
new file mode 100644
index 000000000..c1c3eb707
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8.c
@@ -0,0 +1,2858 @@
+/*
+ * VP7/VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2010 Fiona Glaser
+ * Copyright (C) 2012 Daniel Kang
+ * Copyright (C) 2014 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/imgutils.h"
+
+#include "avcodec.h"
+#include "internal.h"
+#include "mathops.h"
+#include "rectangle.h"
+#include "thread.h"
+#include "vp8.h"
+#include "vp8data.h"
+
+#if ARCH_ARM
+#   include "arm/vp8.h"
+#endif
+
+#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
+#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
+#elif CONFIG_VP7_DECODER
+#define VPX(vp7, f) vp7_ ## f
+#else // CONFIG_VP8_DECODER
+#define VPX(vp7, f) vp8_ ## f
+#endif
+
+static void free_buffers(VP8Context *s)
+{
+    int i;
+    if (s->thread_data)
+        for (i = 0; i < MAX_THREADS; i++) {
+#if HAVE_THREADS
+            pthread_cond_destroy(&s->thread_data[i].cond);
+            pthread_mutex_destroy(&s->thread_data[i].lock);
+#endif
+            av_freep(&s->thread_data[i].filter_strength);
+        }
+    av_freep(&s->thread_data);
+    av_freep(&s->macroblocks_base);
+    av_freep(&s->intra4x4_pred_mode_top);
+    av_freep(&s->top_nnz);
+    av_freep(&s->top_border);
+
+    s->macroblocks = NULL;
+}
+
+static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
+{
+    int ret;
+    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
+                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
+        return ret;
+    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
+        ff_thread_release_buffer(s->avctx, &f->tf);
+        return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+static void vp8_release_frame(VP8Context *s, VP8Frame *f)
+{
+    av_buffer_unref(&f->seg_map);
+    ff_thread_release_buffer(s->avctx, &f->tf);
+}
+
+#if CONFIG_VP8_DECODER
+static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
+{
+    int ret;
+
+    vp8_release_frame(s, dst);
+
+    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
+        return ret;
+    if (src->seg_map &&
+        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
+        vp8_release_frame(s, dst);
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+#endif /* CONFIG_VP8_DECODER */
+
+static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
+{
+    VP8Context *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
+        vp8_release_frame(s, &s->frames[i]);
+    memset(s->framep, 0, sizeof(s->framep));
+
+    if (free_mem)
+        free_buffers(s);
+}
+
+static void vp8_decode_flush(AVCodecContext *avctx)
+{
+    vp8_decode_flush_impl(avctx, 0);
+}
+
+static VP8Frame *vp8_find_free_buffer(VP8Context *s)
+{
+    VP8Frame *frame = NULL;
+    int i;
+
+    // find a free buffer
+    for (i = 0; i < 5; i++)
+        if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT]  &&
+            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
+            frame = &s->frames[i];
+            break;
+        }
+    if (i == 5) {
+        av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
+        abort();
+    }
+    if (frame->tf.f->data[0])
+        vp8_release_frame(s, frame);
+
+    return frame;
+}
+
+static av_always_inline
+int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
+{
+    AVCodecContext *avctx = s->avctx;
+    int i, ret;
+
+    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
+        height != s->avctx->height) {
+        vp8_decode_flush_impl(s->avctx, 1);
+
+        ret = ff_set_dimensions(s->avctx, width, height);
+        if (ret < 0)
+            return ret;
+    }
+
+    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
+    s->mb_height = (s->avctx->coded_height + 15) / 16;
+
+    s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
+                   avctx->thread_count > 1;
+    if (!s->mb_layout) { // Frame threading and one thread
+        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
+                                               sizeof(*s->macroblocks));
+        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
+    } else // Sliced threading
+        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
+                                         sizeof(*s->macroblocks));
+    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
+    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
+    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
+
+    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
+        !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
+        free_buffers(s);
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->thread_data[i].filter_strength =
+            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
+        if (!s->thread_data[i].filter_strength) {
+            free_buffers(s);
+            return AVERROR(ENOMEM);
+        }
+#if HAVE_THREADS
+        pthread_mutex_init(&s->thread_data[i].lock, NULL);
+        pthread_cond_init(&s->thread_data[i].cond, NULL);
+#endif
+    }
+
+    s->macroblocks = s->macroblocks_base + 1;
+
+    return 0;
+}
+
+static int vp7_update_dimensions(VP8Context *s, int width, int height)
+{
+    return update_dimensions(s, width, height, IS_VP7);
+}
+
+static int vp8_update_dimensions(VP8Context *s, int width, int height)
+{
+    return update_dimensions(s, width, height, IS_VP8);
+}
+
+
+static void parse_segment_info(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i;
+
+    s->segmentation.update_map = vp8_rac_get(c);
+
+    if (vp8_rac_get(c)) { // update segment feature data
+        s->segmentation.absolute_vals = vp8_rac_get(c);
+
+        for (i = 0; i < 4; i++)
+            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
+
+        for (i = 0; i < 4; i++)
+            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
+    }
+    if (s->segmentation.update_map)
+        for (i = 0; i < 3; i++)
+            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
+}
+
+static void update_lf_deltas(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        if (vp8_rac_get(c)) {
+            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
+
+            if (vp8_rac_get(c))
+                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
+        }
+    }
+
+    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
+        if (vp8_rac_get(c)) {
+            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
+
+            if (vp8_rac_get(c))
+                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
+        }
+    }
+}
+
+static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    const uint8_t *sizes = buf;
+    int i;
+
+    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
+
+    buf      += 3 * (s->num_coeff_partitions - 1);
+    buf_size -= 3 * (s->num_coeff_partitions - 1);
+    if (buf_size < 0)
+        return -1;
+
+    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
+        int size = AV_RL24(sizes + 3 * i);
+        if (buf_size - size < 0)
+            return -1;
+
+        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
+        buf      += size;
+        buf_size -= size;
+    }
+    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
+
+    return 0;
+}
+
+static void vp7_get_quants(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+
+    int yac_qi  = vp8_rac_get_uint(c, 7);
+    int ydc_qi  = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
+    int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
+    int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
+    int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
+    int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
+
+    s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
+    s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
+    s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
+    s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
+    s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
+    s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
+}
+
+static void vp8_get_quants(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i, base_qi;
+
+    int yac_qi     = vp8_rac_get_uint(c, 7);
+    int ydc_delta  = vp8_rac_get_sint(c, 4);
+    int y2dc_delta = vp8_rac_get_sint(c, 4);
+    int y2ac_delta = vp8_rac_get_sint(c, 4);
+    int uvdc_delta = vp8_rac_get_sint(c, 4);
+    int uvac_delta = vp8_rac_get_sint(c, 4);
+
+    for (i = 0; i < 4; i++) {
+        if (s->segmentation.enabled) {
+            base_qi = s->segmentation.base_quant[i];
+            if (!s->segmentation.absolute_vals)
+                base_qi += yac_qi;
+        } else
+            base_qi = yac_qi;
+
+        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta,  7)];
+        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
+        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
+        /* 101581>>16 is equivalent to 155/100 */
+        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
+        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
+
+        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
+        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
+    }
+}
+
+/**
+ * Determine which buffers golden and altref should be updated with after this frame.
+ * The spec isn't clear here, so I'm going by my understanding of what libvpx does
+ *
+ * Intra frames update all 3 references
+ * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
+ * If the update (golden|altref) flag is set, it's updated with the current frame
+ *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
+ * If the flag is not set, the number read means:
+ *      0: no update
+ *      1: VP56_FRAME_PREVIOUS
+ *      2: update golden with altref, or update altref with golden
+ */
+static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
+{
+    VP56RangeCoder *c = &s->c;
+
+    if (update)
+        return VP56_FRAME_CURRENT;
+
+    switch (vp8_rac_get_uint(c, 2)) {
+    case 1:
+        return VP56_FRAME_PREVIOUS;
+    case 2:
+        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
+    }
+    return VP56_FRAME_NONE;
+}
+
+static void vp78_reset_probability_tables(VP8Context *s)
+{
+    int i, j;
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 16; j++)
+            memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
+                   sizeof(s->prob->token[i][j]));
+}
+
+static void vp78_update_probability_tables(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+    int i, j, k, l, m;
+
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 8; j++)
+            for (k = 0; k < 3; k++)
+                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
+                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
+                        int prob = vp8_rac_get_uint(c, 8);
+                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
+                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
+                    }
+}
+
+#define VP7_MVC_SIZE 17
+#define VP8_MVC_SIZE 19
+
+static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
+                                                            int mvc_size)
+{
+    VP56RangeCoder *c = &s->c;
+    int i, j;
+
+    if (vp8_rac_get(c))
+        for (i = 0; i < 4; i++)
+            s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
+    if (vp8_rac_get(c))
+        for (i = 0; i < 3; i++)
+            s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
+
+    // 17.2 MV probability update
+    for (i = 0; i < 2; i++)
+        for (j = 0; j < mvc_size; j++)
+            if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
+                s->prob->mvc[i][j] = vp8_rac_get_nn(c);
+}
+
+static void update_refs(VP8Context *s)
+{
+    VP56RangeCoder *c = &s->c;
+
+    int update_golden = vp8_rac_get(c);
+    int update_altref = vp8_rac_get(c);
+
+    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
+    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
+}
+
+static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
+{
+    int i, j;
+
+    for (j = 1; j < 3; j++) {
+        for (i = 0; i < height / 2; i++)
+            memcpy(dst->data[j] + i * dst->linesize[j],
+                   src->data[j] + i * src->linesize[j], width / 2);
+    }
+}
+
+static void fade(uint8_t *dst, int dst_linesize,
+                 const uint8_t *src, int src_linesize,
+                 int width, int height,
+                 int alpha, int beta)
+{
+    int i, j;
+    for (j = 0; j < height; j++) {
+        for (i = 0; i < width; i++) {
+            uint8_t y = src[j * src_linesize + i];
+            dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
+        }
+    }
+}
+
+static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
+{
+    int alpha = (int8_t) vp8_rac_get_uint(c, 8);
+    int beta  = (int8_t) vp8_rac_get_uint(c, 8);
+    int ret;
+
+    if (!s->keyframe && (alpha || beta)) {
+        int width  = s->mb_width * 16;
+        int height = s->mb_height * 16;
+        AVFrame *src, *dst;
+
+        if (!s->framep[VP56_FRAME_PREVIOUS] ||
+            !s->framep[VP56_FRAME_GOLDEN]) {
+            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        dst =
+        src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
+
+        /* preserve the golden frame, write a new previous frame */
+        if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
+            s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
+            if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
+                return ret;
+
+            dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
+
+            copy_chroma(dst, src, width, height);
+        }
+
+        fade(dst->data[0], dst->linesize[0],
+             src->data[0], src->linesize[0],
+             width, height, alpha, beta);
+    }
+
+    return 0;
+}
+
+static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    VP56RangeCoder *c = &s->c;
+    int part1_size, hscale, vscale, i, j, ret;
+    int width  = s->avctx->width;
+    int height = s->avctx->height;
+
+    if (buf_size < 4) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->profile = (buf[0] >> 1) & 7;
+    if (s->profile > 1) {
+        avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->keyframe  = !(buf[0] & 1);
+    s->invisible = 0;
+    part1_size   = AV_RL24(buf) >> 4;
+
+    if (buf_size < 4 - s->profile + part1_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    buf      += 4 - s->profile;
+    buf_size -= 4 - s->profile;
+
+    memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
+
+    ff_vp56_init_range_decoder(c, buf, part1_size);
+    buf      += part1_size;
+    buf_size -= part1_size;
+
+    /* A. Dimension information (keyframes only) */
+    if (s->keyframe) {
+        width  = vp8_rac_get_uint(c, 12);
+        height = vp8_rac_get_uint(c, 12);
+        hscale = vp8_rac_get_uint(c, 2);
+        vscale = vp8_rac_get_uint(c, 2);
+        if (hscale || vscale)
+            avpriv_request_sample(s->avctx, "Upscaling");
+
+        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
+        vp78_reset_probability_tables(s);
+        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
+               sizeof(s->prob->pred16x16));
+        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
+               sizeof(s->prob->pred8x8c));
+        for (i = 0; i < 2; i++)
+            memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
+                   sizeof(vp7_mv_default_prob[i]));
+        memset(&s->segmentation, 0, sizeof(s->segmentation));
+        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
+        memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
+    }
+
+    if (s->keyframe || s->profile > 0)
+        memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
+
+    /* B. Decoding information for all four macroblock-level features */
+    for (i = 0; i < 4; i++) {
+        s->feature_enabled[i] = vp8_rac_get(c);
+        if (s->feature_enabled[i]) {
+             s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
+
+             for (j = 0; j < 3; j++)
+                 s->feature_index_prob[i][j] =
+                     vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
+
+             if (vp7_feature_value_size[s->profile][i])
+                 for (j = 0; j < 4; j++)
+                     s->feature_value[i][j] =
+                        vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
+        }
+    }
+
+    s->segmentation.enabled    = 0;
+    s->segmentation.update_map = 0;
+    s->lf_delta.enabled        = 0;
+
+    s->num_coeff_partitions = 1;
+    ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
+
+    if (!s->macroblocks_base || /* first frame */
+        width != s->avctx->width || height != s->avctx->height ||
+        (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
+        if ((ret = vp7_update_dimensions(s, width, height)) < 0)
+            return ret;
+    }
+
+    /* C. Dequantization indices */
+    vp7_get_quants(s);
+
+    /* D. Golden frame update flag (a Flag) for interframes only */
+    if (!s->keyframe) {
+        s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
+        s->sign_bias[VP56_FRAME_GOLDEN] = 0;
+    }
+
+    s->update_last          = 1;
+    s->update_probabilities = 1;
+    s->fade_present         = 1;
+
+    if (s->profile > 0) {
+        s->update_probabilities = vp8_rac_get(c);
+        if (!s->update_probabilities)
+            s->prob[1] = s->prob[0];
+
+        if (!s->keyframe)
+            s->fade_present = vp8_rac_get(c);
+    }
+
+    /* E. Fading information for previous frame */
+    if (s->fade_present && vp8_rac_get(c)) {
+        if ((ret = vp7_fade_frame(s ,c)) < 0)
+            return ret;
+    }
+
+    /* F. Loop filter type */
+    if (!s->profile)
+        s->filter.simple = vp8_rac_get(c);
+
+    /* G. DCT coefficient ordering specification */
+    if (vp8_rac_get(c))
+        for (i = 1; i < 16; i++)
+            s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
+
+    /* H. Loop filter levels  */
+    if (s->profile > 0)
+        s->filter.simple = vp8_rac_get(c);
+    s->filter.level     = vp8_rac_get_uint(c, 6);
+    s->filter.sharpness = vp8_rac_get_uint(c, 3);
+
+    /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
+    vp78_update_probability_tables(s);
+
+    s->mbskip_enabled = 0;
+
+    /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
+    if (!s->keyframe) {
+        s->prob->intra  = vp8_rac_get_uint(c, 8);
+        s->prob->last   = vp8_rac_get_uint(c, 8);
+        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
+    }
+
+    return 0;
+}
+
+static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
+{
+    VP56RangeCoder *c = &s->c;
+    int header_size, hscale, vscale, ret;
+    int width  = s->avctx->width;
+    int height = s->avctx->height;
+
+    if (buf_size < 3) {
+        av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->keyframe  = !(buf[0] & 1);
+    s->profile   =  (buf[0]>>1) & 7;
+    s->invisible = !(buf[0] & 0x10);
+    header_size  = AV_RL24(buf) >> 5;
+    buf      += 3;
+    buf_size -= 3;
+
+    if (s->profile > 3)
+        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
+
+    if (!s->profile)
+        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
+               sizeof(s->put_pixels_tab));
+    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
+        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
+               sizeof(s->put_pixels_tab));
+
+    if (header_size > buf_size - 7 * s->keyframe) {
+        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->keyframe) {
+        if (AV_RL24(buf) != 0x2a019d) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid start code 0x%x\n", AV_RL24(buf));
+            return AVERROR_INVALIDDATA;
+        }
+        width     = AV_RL16(buf + 3) & 0x3fff;
+        height    = AV_RL16(buf + 5) & 0x3fff;
+        hscale    = buf[4] >> 6;
+        vscale    = buf[6] >> 6;
+        buf      += 7;
+        buf_size -= 7;
+
+        if (hscale || vscale)
+            avpriv_request_sample(s->avctx, "Upscaling");
+
+        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
+        vp78_reset_probability_tables(s);
+        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
+               sizeof(s->prob->pred16x16));
+        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
+               sizeof(s->prob->pred8x8c));
+        memcpy(s->prob->mvc, vp8_mv_default_prob,
+               sizeof(s->prob->mvc));
+        memset(&s->segmentation, 0, sizeof(s->segmentation));
+        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
+    }
+
+    ff_vp56_init_range_decoder(c, buf, header_size);
+    buf      += header_size;
+    buf_size -= header_size;
+
+    if (s->keyframe) {
+        s->colorspace = vp8_rac_get(c);
+        if (s->colorspace)
+            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
+        s->fullrange = vp8_rac_get(c);
+    }
+
+    if ((s->segmentation.enabled = vp8_rac_get(c)))
+        parse_segment_info(s);
+    else
+        s->segmentation.update_map = 0; // FIXME: move this to some init function?
+
+    s->filter.simple    = vp8_rac_get(c);
+    s->filter.level     = vp8_rac_get_uint(c, 6);
+    s->filter.sharpness = vp8_rac_get_uint(c, 3);
+
+    if ((s->lf_delta.enabled = vp8_rac_get(c)))
+        if (vp8_rac_get(c))
+            update_lf_deltas(s);
+
+    if (setup_partitions(s, buf, buf_size)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (!s->macroblocks_base || /* first frame */
+        width != s->avctx->width || height != s->avctx->height ||
+        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
+        if ((ret = vp8_update_dimensions(s, width, height)) < 0)
+            return ret;
+
+    vp8_get_quants(s);
+
+    if (!s->keyframe) {
+        update_refs(s);
+        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
+        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
+    }
+
+    // if we aren't saving this frame's probabilities for future frames,
+    // make a copy of the current probabilities
+    if (!(s->update_probabilities = vp8_rac_get(c)))
+        s->prob[1] = s->prob[0];
+
+    s->update_last = s->keyframe || vp8_rac_get(c);
+
+    vp78_update_probability_tables(s);
+
+    if ((s->mbskip_enabled = vp8_rac_get(c)))
+        s->prob->mbskip = vp8_rac_get_uint(c, 8);
+
+    if (!s->keyframe) {
+        s->prob->intra  = vp8_rac_get_uint(c, 8);
+        s->prob->last   = vp8_rac_get_uint(c, 8);
+        s->prob->golden = vp8_rac_get_uint(c, 8);
+        vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
+    }
+
+    return 0;
+}
+
+static av_always_inline
+void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
+{
+    dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
+                             av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
+    dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
+                             av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
+}
+
+/**
+ * Motion vector coding, 17.1.
+ */
+static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
+{
+    int bit, x = 0;
+
+    if (vp56_rac_get_prob_branchy(c, p[0])) {
+        int i;
+
+        for (i = 0; i < 3; i++)
+            x += vp56_rac_get_prob(c, p[9 + i]) << i;
+        for (i = (vp7 ? 7 : 9); i > 3; i--)
+            x += vp56_rac_get_prob(c, p[9 + i]) << i;
+        if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
+            x += 8;
+    } else {
+        // small_mvtree
+        const uint8_t *ps = p + 2;
+        bit = vp56_rac_get_prob(c, *ps);
+        ps += 1 + 3 * bit;
+        x  += 4 * bit;
+        bit = vp56_rac_get_prob(c, *ps);
+        ps += 1 + bit;
+        x  += 2 * bit;
+        x  += vp56_rac_get_prob(c, *ps);
+    }
+
+    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
+}
+
+static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
+{
+    return read_mv_component(c, p, 1);
+}
+
+static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
+{
+    return read_mv_component(c, p, 0);
+}
+
+static av_always_inline
+const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
+{
+    if (is_vp7)
+        return vp7_submv_prob;
+
+    if (left == top)
+        return vp8_submv_prob[4 - !!left];
+    if (!top)
+        return vp8_submv_prob[2];
+    return vp8_submv_prob[1 - !!left];
+}
+
+/**
+ * Split motion vector prediction, 16.4.
+ * @returns the number of motion vectors parsed (2, 4 or 16)
+ */
+static av_always_inline
+int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
+                    int layout, int is_vp7)
+{
+    int part_idx;
+    int n, num;
+    VP8Macroblock *top_mb;
+    VP8Macroblock *left_mb = &mb[-1];
+    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
+    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
+    VP56mv *top_mv;
+    VP56mv *left_mv = left_mb->bmv;
+    VP56mv *cur_mv  = mb->bmv;
+
+    if (!layout) // layout is inlined, s->mb_layout is not
+        top_mb = &mb[2];
+    else
+        top_mb = &mb[-s->mb_width - 1];
+    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
+    top_mv       = top_mb->bmv;
+
+    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
+        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
+            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
+        else
+            part_idx = VP8_SPLITMVMODE_8x8;
+    } else {
+        part_idx = VP8_SPLITMVMODE_4x4;
+    }
+
+    num              = vp8_mbsplit_count[part_idx];
+    mbsplits_cur     = vp8_mbsplits[part_idx],
+    firstidx         = vp8_mbfirstidx[part_idx];
+    mb->partitioning = part_idx;
+
+    for (n = 0; n < num; n++) {
+        int k = firstidx[n];
+        uint32_t left, above;
+        const uint8_t *submv_prob;
+
+        if (!(k & 3))
+            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
+        else
+            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
+        if (k <= 3)
+            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
+        else
+            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
+
+        submv_prob = get_submv_prob(left, above, is_vp7);
+
+        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
+            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
+                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
+                    mb->bmv[n].y = mb->mv.y +
+                                   read_mv_component(c, s->prob->mvc[0], is_vp7);
+                    mb->bmv[n].x = mb->mv.x +
+                                   read_mv_component(c, s->prob->mvc[1], is_vp7);
+                } else {
+                    AV_ZERO32(&mb->bmv[n]);
+                }
+            } else {
+                AV_WN32A(&mb->bmv[n], above);
+            }
+        } else {
+            AV_WN32A(&mb->bmv[n], left);
+        }
+    }
+
+    return num;
+}
+
+/**
+ * The vp7 reference decoder uses a padding macroblock column (added to right
+ * edge of the frame) to guard against illegal macroblock offsets. The
+ * algorithm has bugs that permit offsets to straddle the padding column.
+ * This function replicates those bugs.
+ *
+ * @param[out] edge_x macroblock x address
+ * @param[out] edge_y macroblock y address
+ *
+ * @return macroblock offset legal (boolean)
+ */
+static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
+                                   int xoffset, int yoffset, int boundary,
+                                   int *edge_x, int *edge_y)
+{
+    int vwidth = mb_width + 1;
+    int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
+    if (new < boundary || new % vwidth == vwidth - 1)
+        return 0;
+    *edge_y = new / vwidth;
+    *edge_x = new % vwidth;
+    return 1;
+}
+
+static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
+{
+    return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
+}
+
+static av_always_inline
+void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
+                    int mb_x, int mb_y, int layout)
+{
+    VP8Macroblock *mb_edge[12];
+    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
+    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
+    int idx = CNT_ZERO;
+    VP56mv near_mv[3];
+    uint8_t cnt[3] = { 0 };
+    VP56RangeCoder *c = &s->c;
+    int i;
+
+    AV_ZERO32(&near_mv[0]);
+    AV_ZERO32(&near_mv[1]);
+    AV_ZERO32(&near_mv[2]);
+
+    for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
+        const VP7MVPred * pred = &vp7_mv_pred[i];
+        int edge_x, edge_y;
+
+        if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
+                                    pred->yoffset, !s->profile, &edge_x, &edge_y)) {
+            VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
+                                             ? s->macroblocks_base + 1 + edge_x +
+                                               (s->mb_width + 1) * (edge_y + 1)
+                                             : s->macroblocks + edge_x +
+                                               (s->mb_height - edge_y - 1) * 2;
+            uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
+            if (mv) {
+                if (AV_RN32A(&near_mv[CNT_NEAREST])) {
+                    if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
+                        idx = CNT_NEAREST;
+                    } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
+                        if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
+                            continue;
+                        idx = CNT_NEAR;
+                    } else {
+                        AV_WN32A(&near_mv[CNT_NEAR], mv);
+                        idx = CNT_NEAR;
+                    }
+                } else {
+                    AV_WN32A(&near_mv[CNT_NEAREST], mv);
+                    idx = CNT_NEAREST;
+                }
+            } else {
+                idx = CNT_ZERO;
+            }
+        } else {
+            idx = CNT_ZERO;
+        }
+        cnt[idx] += vp7_mv_pred[i].score;
+    }
+
+    mb->partitioning = VP8_SPLITMVMODE_NONE;
+
+    if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
+        mb->mode = VP8_MVMODE_MV;
+
+        if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
+
+            if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
+
+                if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
+                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
+                else
+                    AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
+
+                if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
+                    mb->mode = VP8_MVMODE_SPLIT;
+                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
+                } else {
+                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
+                    mb->bmv[0] = mb->mv;
+                }
+            } else {
+                mb->mv = near_mv[CNT_NEAR];
+                mb->bmv[0] = mb->mv;
+            }
+        } else {
+            mb->mv = near_mv[CNT_NEAREST];
+            mb->bmv[0] = mb->mv;
+        }
+    } else {
+        mb->mode = VP8_MVMODE_ZERO;
+        AV_ZERO32(&mb->mv);
+        mb->bmv[0] = mb->mv;
+    }
+}
+
+static av_always_inline
+void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
+                    int mb_x, int mb_y, int layout)
+{
+    VP8Macroblock *mb_edge[3] = { 0      /* top */,
+                                  mb - 1 /* left */,
+                                  0      /* top-left */ };
+    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
+    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
+    int idx = CNT_ZERO;
+    int cur_sign_bias = s->sign_bias[mb->ref_frame];
+    int8_t *sign_bias = s->sign_bias;
+    VP56mv near_mv[4];
+    uint8_t cnt[4] = { 0 };
+    VP56RangeCoder *c = &s->c;
+
+    if (!layout) { // layout is inlined (s->mb_layout is not)
+        mb_edge[0] = mb + 2;
+        mb_edge[2] = mb + 1;
+    } else {
+        mb_edge[0] = mb - s->mb_width - 1;
+        mb_edge[2] = mb - s->mb_width - 2;
+    }
+
+    AV_ZERO32(&near_mv[0]);
+    AV_ZERO32(&near_mv[1]);
+    AV_ZERO32(&near_mv[2]);
+
+    /* Process MB on top, left and top-left */
+#define MV_EDGE_CHECK(n)                                                      \
+    {                                                                         \
+        VP8Macroblock *edge = mb_edge[n];                                     \
+        int edge_ref = edge->ref_frame;                                       \
+        if (edge_ref != VP56_FRAME_CURRENT) {                                 \
+            uint32_t mv = AV_RN32A(&edge->mv);                                \
+            if (mv) {                                                         \
+                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
+                    /* SWAR negate of the values in mv. */                    \
+                    mv = ~mv;                                                 \
+                    mv = ((mv & 0x7fff7fff) +                                 \
+                          0x00010001) ^ (mv & 0x80008000);                    \
+                }                                                             \
+                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
+                    AV_WN32A(&near_mv[++idx], mv);                            \
+                cnt[idx] += 1 + (n != 2);                                     \
+            } else                                                            \
+                cnt[CNT_ZERO] += 1 + (n != 2);                                \
+        }                                                                     \
+    }
+
+    MV_EDGE_CHECK(0)
+    MV_EDGE_CHECK(1)
+    MV_EDGE_CHECK(2)
+
+    mb->partitioning = VP8_SPLITMVMODE_NONE;
+    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
+        mb->mode = VP8_MVMODE_MV;
+
+        /* If we have three distinct MVs, merge first and last if they're the same */
+        if (cnt[CNT_SPLITMV] &&
+            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
+            cnt[CNT_NEAREST] += 1;
+
+        /* Swap near and nearest if necessary */
+        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
+            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
+            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
+        }
+
+        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
+            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
+                /* Choose the best mv out of 0,0 and the nearest mv */
+                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
+                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
+                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
+                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
+
+                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
+                    mb->mode = VP8_MVMODE_SPLIT;
+                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
+                } else {
+                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
+                    mb->bmv[0] = mb->mv;
+                }
+            } else {
+                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
+                mb->bmv[0] = mb->mv;
+            }
+        } else {
+            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
+            mb->bmv[0] = mb->mv;
+        }
+    } else {
+        mb->mode = VP8_MVMODE_ZERO;
+        AV_ZERO32(&mb->mv);
+        mb->bmv[0] = mb->mv;
+    }
+}
+
+static av_always_inline
+void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
+                           int mb_x, int keyframe, int layout)
+{
+    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
+
+    if (layout) {
+        VP8Macroblock *mb_top = mb - s->mb_width - 1;
+        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
+    }
+    if (keyframe) {
+        int x, y;
+        uint8_t *top;
+        uint8_t *const left = s->intra4x4_pred_mode_left;
+        if (layout)
+            top = mb->intra4x4_pred_mode_top;
+        else
+            top = s->intra4x4_pred_mode_top + 4 * mb_x;
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                const uint8_t *ctx;
+                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
+                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
+                left[y]   = top[x] = *intra4x4;
+                intra4x4++;
+            }
+        }
+    } else {
+        int i;
+        for (i = 0; i < 16; i++)
+            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
+                                           vp8_pred4x4_prob_inter);
+    }
+}
+
+static av_always_inline
+void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+                    uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
+{
+    VP56RangeCoder *c = &s->c;
+    static const char *vp7_feature_name[] = { "q-index",
+                                              "lf-delta",
+                                              "partial-golden-update",
+                                              "blit-pitch" };
+    if (is_vp7) {
+        int i;
+        *segment = 0;
+        for (i = 0; i < 4; i++) {
+            if (s->feature_enabled[i]) {
+                if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
+                      int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
+                                                   s->feature_index_prob[i]);
+                      av_log(s->avctx, AV_LOG_WARNING,
+                             "Feature %s present in macroblock (value 0x%x)\n",
+                             vp7_feature_name[i], s->feature_value[i][index]);
+                }
+           }
+        }
+    } else if (s->segmentation.update_map) {
+        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
+        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
+    } else if (s->segmentation.enabled)
+        *segment = ref ? *ref : *segment;
+    mb->segment = *segment;
+
+    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
+
+    if (s->keyframe) {
+        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
+                                    vp8_pred16x16_prob_intra);
+
+        if (mb->mode == MODE_I4x4) {
+            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
+        } else {
+            const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
+                                           : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
+            if (s->mb_layout)
+                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
+            else
+                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
+            AV_WN32A(s->intra4x4_pred_mode_left, modes);
+        }
+
+        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
+                                                vp8_pred8x8c_prob_intra);
+        mb->ref_frame        = VP56_FRAME_CURRENT;
+    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
+        // inter MB, 16.2
+        if (vp56_rac_get_prob_branchy(c, s->prob->last))
+            mb->ref_frame =
+                (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
+                                                                   : VP56_FRAME_GOLDEN;
+        else
+            mb->ref_frame = VP56_FRAME_PREVIOUS;
+        s->ref_count[mb->ref_frame - 1]++;
+
+        // motion vectors, 16.3
+        if (is_vp7)
+            vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
+        else
+            vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
+    } else {
+        // intra MB, 16.1
+        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
+
+        if (mb->mode == MODE_I4x4)
+            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
+
+        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
+                                                s->prob->pred8x8c);
+        mb->ref_frame        = VP56_FRAME_CURRENT;
+        mb->partitioning     = VP8_SPLITMVMODE_NONE;
+        AV_ZERO32(&mb->bmv[0]);
+    }
+}
+
+/**
+ * @param r     arithmetic bitstream reader context
+ * @param block destination for block coefficients
+ * @param probs probabilities to use when reading trees from the bitstream
+ * @param i     initial coeff index, 0 unless a separate DC block is coded
+ * @param qmul  array holding the dc/ac dequant factor at position 0/1
+ *
+ * @return 0 if no coeffs were decoded
+ *         otherwise, the index of the last coeff decoded plus one
+ */
+static av_always_inline
+int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
+                                 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                 int i, uint8_t *token_prob, int16_t qmul[2],
+                                 const uint8_t scan[16], int vp7)
+{
+    VP56RangeCoder c = *r;
+    goto skip_eob;
+    do {
+        int coeff;
+restart:
+        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
+            break;
+
+skip_eob:
+        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
+            if (++i == 16)
+                break; // invalid input; blocks should end with EOB
+            token_prob = probs[i][0];
+            if (vp7)
+                goto restart;
+            goto skip_eob;
+        }
+
+        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
+            coeff = 1;
+            token_prob = probs[i + 1][1];
+        } else {
+            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
+                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
+                if (coeff)
+                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
+                coeff += 2;
+            } else {
+                // DCT_CAT*
+                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
+                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
+                        coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
+                    } else {                                    // DCT_CAT2
+                        coeff  = 7;
+                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
+                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
+                    }
+                } else {    // DCT_CAT3 and up
+                    int a   = vp56_rac_get_prob(&c, token_prob[8]);
+                    int b   = vp56_rac_get_prob(&c, token_prob[9 + a]);
+                    int cat = (a << 1) + b;
+                    coeff  = 3 + (8 << cat);
+                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
+                }
+            }
+            token_prob = probs[i + 1][2];
+        }
+        block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
+    } while (++i < 16);
+
+    *r = c;
+    return i;
+}
+
+static av_always_inline
+int inter_predict_dc(int16_t block[16], int16_t pred[2])
+{
+    int16_t dc = block[0];
+    int ret = 0;
+
+    if (pred[1] > 3) {
+        dc += pred[0];
+        ret = 1;
+    }
+
+    if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
+        block[0] = pred[0] = dc;
+        pred[1] = 0;
+    } else {
+        if (pred[0] == dc)
+            pred[1]++;
+        block[0] = pred[0] = dc;
+    }
+
+    return ret;
+}
+
+static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
+                                            int16_t block[16],
+                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                            int i, uint8_t *token_prob,
+                                            int16_t qmul[2],
+                                            const uint8_t scan[16])
+{
+    return decode_block_coeffs_internal(r, block, probs, i,
+                                        token_prob, qmul, scan, IS_VP7);
+}
+
+#ifndef vp8_decode_block_coeffs_internal
+static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
+                                            int16_t block[16],
+                                            uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                            int i, uint8_t *token_prob,
+                                            int16_t qmul[2])
+{
+    return decode_block_coeffs_internal(r, block, probs, i,
+                                        token_prob, qmul, ff_zigzag_scan, IS_VP8);
+}
+#endif
+
+/**
+ * @param c          arithmetic bitstream reader context
+ * @param block      destination for block coefficients
+ * @param probs      probabilities to use when reading trees from the bitstream
+ * @param i          initial coeff index, 0 unless a separate DC block is coded
+ * @param zero_nhood the initial prediction context for number of surrounding
+ *                   all-zero blocks (only left/top, so 0-2)
+ * @param qmul       array holding the dc/ac dequant factor at position 0/1
+ * @param scan       scan pattern (VP7 only)
+ *
+ * @return 0 if no coeffs were decoded
+ *         otherwise, the index of the last coeff decoded plus one
+ */
+static av_always_inline
+int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
+                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                        int i, int zero_nhood, int16_t qmul[2],
+                        const uint8_t scan[16], int vp7)
+{
+    uint8_t *token_prob = probs[i][zero_nhood];
+    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
+        return 0;
+    return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
+                                                  token_prob, qmul, scan)
+               : vp8_decode_block_coeffs_internal(c, block, probs, i,
+                                                  token_prob, qmul);
+}
+
+static av_always_inline
+void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
+                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
+                      int is_vp7)
+{
+    int i, x, y, luma_start = 0, luma_ctx = 3;
+    int nnz_pred, nnz, nnz_total = 0;
+    int segment = mb->segment;
+    int block_dc = 0;
+
+    if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
+        nnz_pred = t_nnz[8] + l_nnz[8];
+
+        // decode DC values and do hadamard
+        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
+                                  nnz_pred, s->qmat[segment].luma_dc_qmul,
+                                  ff_zigzag_scan, is_vp7);
+        l_nnz[8] = t_nnz[8] = !!nnz;
+
+        if (is_vp7 && mb->mode > MODE_I4x4) {
+            nnz |=  inter_predict_dc(td->block_dc,
+                                     s->inter_dc_pred[mb->ref_frame - 1]);
+        }
+
+        if (nnz) {
+            nnz_total += nnz;
+            block_dc   = 1;
+            if (nnz == 1)
+                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
+            else
+                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
+        }
+        luma_start = 1;
+        luma_ctx   = 0;
+    }
+
+    // luma blocks
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++) {
+            nnz_pred = l_nnz[y] + t_nnz[x];
+            nnz = decode_block_coeffs(c, td->block[y][x],
+                                      s->prob->token[luma_ctx],
+                                      luma_start, nnz_pred,
+                                      s->qmat[segment].luma_qmul,
+                                      s->prob[0].scan, is_vp7);
+            /* nnz+block_dc may be one more than the actual last index,
+             * but we don't care */
+            td->non_zero_count_cache[y][x] = nnz + block_dc;
+            t_nnz[x] = l_nnz[y] = !!nnz;
+            nnz_total += nnz;
+        }
+
+    // chroma blocks
+    // TODO: what to do about dimensions? 2nd dim for luma is x,
+    // but for chroma it's (y<<1)|x
+    for (i = 4; i < 6; i++)
+        for (y = 0; y < 2; y++)
+            for (x = 0; x < 2; x++) {
+                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
+                nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
+                                          s->prob->token[2], 0, nnz_pred,
+                                          s->qmat[segment].chroma_qmul,
+                                          s->prob[0].scan, is_vp7);
+                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
+                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
+                nnz_total += nnz;
+            }
+
+    // if there were no coded coeffs despite the macroblock not being marked skip,
+    // we MUST not do the inner loop filter and should not do IDCT
+    // Since skip isn't used for bitstream prediction, just manually set it.
+    if (!nnz_total)
+        mb->skip = 1;
+}
+
+static av_always_inline
+void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
+                      uint8_t *src_cb, uint8_t *src_cr,
+                      int linesize, int uvlinesize, int simple)
+{
+    AV_COPY128(top_border, src_y + 15 * linesize);
+    if (!simple) {
+        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
+        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
+    }
+}
+
+static av_always_inline
+void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
+                    uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
+                    int mb_y, int mb_width, int simple, int xchg)
+{
+    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
+    src_y  -= linesize;
+    src_cb -= uvlinesize;
+    src_cr -= uvlinesize;
+
+#define XCHG(a, b, xchg)                                                      \
+    do {                                                                      \
+        if (xchg)                                                             \
+            AV_SWAP64(b, a);                                                  \
+        else                                                                  \
+            AV_COPY64(b, a);                                                  \
+    } while (0)
+
+    XCHG(top_border_m1 + 8, src_y - 8, xchg);
+    XCHG(top_border, src_y, xchg);
+    XCHG(top_border + 8, src_y + 8, 1);
+    if (mb_x < mb_width - 1)
+        XCHG(top_border + 32, src_y + 16, 1);
+
+    // only copy chroma for normal loop filter
+    // or to initialize the top row to 127
+    if (!simple || !mb_y) {
+        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
+        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
+        XCHG(top_border + 16, src_cb, 1);
+        XCHG(top_border + 24, src_cr, 1);
+    }
+}
+
+static av_always_inline
+int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
+{
+    if (!mb_x)
+        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
+    else
+        return mb_y ? mode : LEFT_DC_PRED8x8;
+}
+
+static av_always_inline
+int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
+{
+    if (!mb_x)
+        return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
+    else
+        return mb_y ? mode : HOR_PRED8x8;
+}
+
+static av_always_inline
+int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
+{
+    switch (mode) {
+    case DC_PRED8x8:
+        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
+    case VERT_PRED8x8:
+        return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
+    case HOR_PRED8x8:
+        return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
+    case PLANE_PRED8x8: /* TM */
+        return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
+    }
+    return mode;
+}
+
+static av_always_inline
+int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
+{
+    if (!mb_x) {
+        return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
+    } else {
+        return mb_y ? mode : HOR_VP8_PRED;
+    }
+}
+
+static av_always_inline
+int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
+                                     int *copy_buf, int vp7)
+{
+    switch (mode) {
+    case VERT_PRED:
+        if (!mb_x && mb_y) {
+            *copy_buf = 1;
+            return mode;
+        }
+        /* fall-through */
+    case DIAG_DOWN_LEFT_PRED:
+    case VERT_LEFT_PRED:
+        return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
+    case HOR_PRED:
+        if (!mb_y) {
+            *copy_buf = 1;
+            return mode;
+        }
+        /* fall-through */
+    case HOR_UP_PRED:
+        return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
+    case TM_VP8_PRED:
+        return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
+    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
+                   * as 16x16/8x8 DC */
+    case DIAG_DOWN_RIGHT_PRED:
+    case VERT_RIGHT_PRED:
+    case HOR_DOWN_PRED:
+        if (!mb_y || !mb_x)
+            *copy_buf = 1;
+        return mode;
+    }
+    return mode;
+}
+
+static av_always_inline
+void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
+                   VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
+{
+    int x, y, mode, nnz;
+    uint32_t tr;
+
+    /* for the first row, we need to run xchg_mb_border to init the top edge
+     * to 127 otherwise, skip it if we aren't going to deblock */
+    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
+        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
+                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+                       s->filter.simple, 1);
+
+    if (mb->mode < MODE_I4x4) {
+        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
+        s->hpc.pred16x16[mode](dst[0], s->linesize);
+    } else {
+        uint8_t *ptr = dst[0];
+        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
+        const uint8_t lo = is_vp7 ? 128 : 127;
+        const uint8_t hi = is_vp7 ? 128 : 129;
+        uint8_t tr_top[4] = { lo, lo, lo, lo };
+
+        // all blocks on the right edge of the macroblock use bottom edge
+        // the top macroblock for their topright edge
+        uint8_t *tr_right = ptr - s->linesize + 16;
+
+        // if we're on the right edge of the frame, said edge is extended
+        // from the top macroblock
+        if (mb_y && mb_x == s->mb_width - 1) {
+            tr       = tr_right[-1] * 0x01010101u;
+            tr_right = (uint8_t *) &tr;
+        }
+
+        if (mb->skip)
+            AV_ZERO128(td->non_zero_count_cache);
+
+        for (y = 0; y < 4; y++) {
+            uint8_t *topright = ptr + 4 - s->linesize;
+            for (x = 0; x < 4; x++) {
+                int copy = 0, linesize = s->linesize;
+                uint8_t *dst = ptr + 4 * x;
+                LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
+
+                if ((y == 0 || x == 3) && mb_y == 0) {
+                    topright = tr_top;
+                } else if (x == 3)
+                    topright = tr_right;
+
+                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
+                                                        mb_y + y, &copy, is_vp7);
+                if (copy) {
+                    dst      = copy_dst + 12;
+                    linesize = 8;
+                    if (!(mb_y + y)) {
+                        copy_dst[3] = lo;
+                        AV_WN32A(copy_dst + 4, lo * 0x01010101U);
+                    } else {
+                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
+                        if (!(mb_x + x)) {
+                            copy_dst[3] = hi;
+                        } else {
+                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
+                        }
+                    }
+                    if (!(mb_x + x)) {
+                        copy_dst[11] =
+                        copy_dst[19] =
+                        copy_dst[27] =
+                        copy_dst[35] = hi;
+                    } else {
+                        copy_dst[11] = ptr[4 * x                   - 1];
+                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
+                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
+                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
+                    }
+                }
+                s->hpc.pred4x4[mode](dst, topright, linesize);
+                if (copy) {
+                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
+                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
+                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
+                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
+                }
+
+                nnz = td->non_zero_count_cache[y][x];
+                if (nnz) {
+                    if (nnz == 1)
+                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
+                                                  td->block[y][x], s->linesize);
+                    else
+                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
+                                               td->block[y][x], s->linesize);
+                }
+                topright += 4;
+            }
+
+            ptr      += 4 * s->linesize;
+            intra4x4 += 4;
+        }
+    }
+
+    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
+                                            mb_x, mb_y, is_vp7);
+    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
+    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
+
+    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
+        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
+                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+                       s->filter.simple, 0);
+}
+
+static const uint8_t subpel_idx[3][8] = {
+    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
+                                // also function pointer index
+    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
+    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
+};
+
+/**
+ * luma MC function
+ *
+ * @param s        VP8 decoding context
+ * @param dst      target buffer for block data at block position
+ * @param ref      reference picture buffer at origin (0, 0)
+ * @param mv       motion vector (relative to block position) to get pixel data from
+ * @param x_off    horizontal position of block from origin (0, 0)
+ * @param y_off    vertical position of block from origin (0, 0)
+ * @param block_w  width of block (16, 8 or 4)
+ * @param block_h  height of block (always same as block_w)
+ * @param width    width of src/dst plane data
+ * @param height   height of src/dst plane data
+ * @param linesize size of a single line of plane data, including padding
+ * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
+ */
+static av_always_inline
+void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
+                 ThreadFrame *ref, const VP56mv *mv,
+                 int x_off, int y_off, int block_w, int block_h,
+                 int width, int height, ptrdiff_t linesize,
+                 vp8_mc_func mc_func[3][3])
+{
+    uint8_t *src = ref->f->data[0];
+
+    if (AV_RN32A(mv)) {
+        int src_linesize = linesize;
+
+        int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
+        int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
+
+        x_off += mv->x >> 2;
+        y_off += mv->y >> 2;
+
+        // edge emulation
+        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
+        src += y_off * linesize + x_off;
+        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
+            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
+            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                     src - my_idx * linesize - mx_idx,
+                                     EDGE_EMU_LINESIZE, linesize,
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx,
+                                     width, height);
+            src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            src_linesize = EDGE_EMU_LINESIZE;
+        }
+        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
+    } else {
+        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
+        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
+                      linesize, block_h, 0, 0);
+    }
+}
+
+/**
+ * chroma MC function
+ *
+ * @param s        VP8 decoding context
+ * @param dst1     target buffer for block data at block position (U plane)
+ * @param dst2     target buffer for block data at block position (V plane)
+ * @param ref      reference picture buffer at origin (0, 0)
+ * @param mv       motion vector (relative to block position) to get pixel data from
+ * @param x_off    horizontal position of block from origin (0, 0)
+ * @param y_off    vertical position of block from origin (0, 0)
+ * @param block_w  width of block (16, 8 or 4)
+ * @param block_h  height of block (always same as block_w)
+ * @param width    width of src/dst plane data
+ * @param height   height of src/dst plane data
+ * @param linesize size of a single line of plane data, including padding
+ * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
+ */
+static av_always_inline
+void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
+                   uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
+                   int x_off, int y_off, int block_w, int block_h,
+                   int width, int height, ptrdiff_t linesize,
+                   vp8_mc_func mc_func[3][3])
+{
+    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
+
+    if (AV_RN32A(mv)) {
+        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
+        int my = mv->y & 7, my_idx = subpel_idx[0][my];
+
+        x_off += mv->x >> 3;
+        y_off += mv->y >> 3;
+
+        // edge emulation
+        src1 += y_off * linesize + x_off;
+        src2 += y_off * linesize + x_off;
+        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
+        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
+            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
+            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                     src1 - my_idx * linesize - mx_idx,
+                                     EDGE_EMU_LINESIZE, linesize,
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx, width, height);
+            src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
+
+            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
+                                     src2 - my_idx * linesize - mx_idx,
+                                     EDGE_EMU_LINESIZE, linesize,
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx, width, height);
+            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
+            mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
+        } else {
+            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
+            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
+        }
+    } else {
+        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
+        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
+        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
+    }
+}
+
+static av_always_inline
+void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
+                 ThreadFrame *ref_frame, int x_off, int y_off,
+                 int bx_off, int by_off, int block_w, int block_h,
+                 int width, int height, VP56mv *mv)
+{
+    VP56mv uvmv = *mv;
+
+    /* Y */
+    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
+                ref_frame, mv, x_off + bx_off, y_off + by_off,
+                block_w, block_h, width, height, s->linesize,
+                s->put_pixels_tab[block_w == 8]);
+
+    /* U/V */
+    if (s->profile == 3) {
+        /* this block only applies VP8; it is safe to check
+         * only the profile, as VP7 profile <= 1 */
+        uvmv.x &= ~7;
+        uvmv.y &= ~7;
+    }
+    x_off   >>= 1;
+    y_off   >>= 1;
+    bx_off  >>= 1;
+    by_off  >>= 1;
+    width   >>= 1;
+    height  >>= 1;
+    block_w >>= 1;
+    block_h >>= 1;
+    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
+                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
+                  &uvmv, x_off + bx_off, y_off + by_off,
+                  block_w, block_h, width, height, s->uvlinesize,
+                  s->put_pixels_tab[1 + (block_w == 4)]);
+}
+
+/* Fetch pixels for estimated mv 4 macroblocks ahead.
+ * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
+static av_always_inline
+void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+                     int mb_xy, int ref)
+{
+    /* Don't prefetch refs that haven't been used very often this frame. */
+    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
+        int x_off = mb_x << 4, y_off = mb_y << 4;
+        int mx = (mb->mv.x >> 2) + x_off + 8;
+        int my = (mb->mv.y >> 2) + y_off;
+        uint8_t **src = s->framep[ref]->tf.f->data;
+        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
+        /* For threading, a ff_thread_await_progress here might be useful, but
+         * it actually slows down the decoder. Since a bad prefetch doesn't
+         * generate bad decoder output, we don't run it here. */
+        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
+        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
+        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
+    }
+}
+
+/**
+ * Apply motion vectors to prediction buffer, chapter 18.
+ */
+static av_always_inline
+void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
+                   VP8Macroblock *mb, int mb_x, int mb_y)
+{
+    int x_off = mb_x << 4, y_off = mb_y << 4;
+    int width = 16 * s->mb_width, height = 16 * s->mb_height;
+    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
+    VP56mv *bmv = mb->bmv;
+
+    switch (mb->partitioning) {
+    case VP8_SPLITMVMODE_NONE:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 16, 16, width, height, &mb->mv);
+        break;
+    case VP8_SPLITMVMODE_4x4: {
+        int x, y;
+        VP56mv uvmv;
+
+        /* Y */
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
+                            ref, &bmv[4 * y + x],
+                            4 * x + x_off, 4 * y + y_off, 4, 4,
+                            width, height, s->linesize,
+                            s->put_pixels_tab[2]);
+            }
+        }
+
+        /* U/V */
+        x_off  >>= 1;
+        y_off  >>= 1;
+        width  >>= 1;
+        height >>= 1;
+        for (y = 0; y < 2; y++) {
+            for (x = 0; x < 2; x++) {
+                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
+                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
+                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
+                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
+                uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
+                uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
+                if (s->profile == 3) {
+                    uvmv.x &= ~7;
+                    uvmv.y &= ~7;
+                }
+                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
+                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
+                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
+                              width, height, s->uvlinesize,
+                              s->put_pixels_tab[2]);
+            }
+        }
+        break;
+    }
+    case VP8_SPLITMVMODE_16x8:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 16, 8, width, height, &bmv[0]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 8, 16, 8, width, height, &bmv[1]);
+        break;
+    case VP8_SPLITMVMODE_8x16:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 8, 16, width, height, &bmv[0]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    8, 0, 8, 16, width, height, &bmv[1]);
+        break;
+    case VP8_SPLITMVMODE_8x8:
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 0, 8, 8, width, height, &bmv[0]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    8, 0, 8, 8, width, height, &bmv[1]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    0, 8, 8, 8, width, height, &bmv[2]);
+        vp8_mc_part(s, td, dst, ref, x_off, y_off,
+                    8, 8, 8, 8, width, height, &bmv[3]);
+        break;
+    }
+}
+
+static av_always_inline
+void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
+{
+    int x, y, ch;
+
+    if (mb->mode != MODE_I4x4) {
+        uint8_t *y_dst = dst[0];
+        for (y = 0; y < 4; y++) {
+            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
+            if (nnz4) {
+                if (nnz4 & ~0x01010101) {
+                    for (x = 0; x < 4; x++) {
+                        if ((uint8_t) nnz4 == 1)
+                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
+                                                      td->block[y][x],
+                                                      s->linesize);
+                        else if ((uint8_t) nnz4 > 1)
+                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
+                                                   td->block[y][x],
+                                                   s->linesize);
+                        nnz4 >>= 8;
+                        if (!nnz4)
+                            break;
+                    }
+                } else {
+                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
+                }
+            }
+            y_dst += 4 * s->linesize;
+        }
+    }
+
+    for (ch = 0; ch < 2; ch++) {
+        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
+        if (nnz4) {
+            uint8_t *ch_dst = dst[1 + ch];
+            if (nnz4 & ~0x01010101) {
+                for (y = 0; y < 2; y++) {
+                    for (x = 0; x < 2; x++) {
+                        if ((uint8_t) nnz4 == 1)
+                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
+                                                      td->block[4 + ch][(y << 1) + x],
+                                                      s->uvlinesize);
+                        else if ((uint8_t) nnz4 > 1)
+                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
+                                                   td->block[4 + ch][(y << 1) + x],
+                                                   s->uvlinesize);
+                        nnz4 >>= 8;
+                        if (!nnz4)
+                            goto chroma_idct_end;
+                    }
+                    ch_dst += 4 * s->uvlinesize;
+                }
+            } else {
+                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
+            }
+        }
+chroma_idct_end:
+        ;
+    }
+}
+
+static av_always_inline
+void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
+                         VP8FilterStrength *f, int is_vp7)
+{
+    int interior_limit, filter_level;
+
+    if (s->segmentation.enabled) {
+        filter_level = s->segmentation.filter_level[mb->segment];
+        if (!s->segmentation.absolute_vals)
+            filter_level += s->filter.level;
+    } else
+        filter_level = s->filter.level;
+
+    if (s->lf_delta.enabled) {
+        filter_level += s->lf_delta.ref[mb->ref_frame];
+        filter_level += s->lf_delta.mode[mb->mode];
+    }
+
+    filter_level = av_clip_uintp2(filter_level, 6);
+
+    interior_limit = filter_level;
+    if (s->filter.sharpness) {
+        interior_limit >>= (s->filter.sharpness + 3) >> 2;
+        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
+    }
+    interior_limit = FFMAX(interior_limit, 1);
+
+    f->filter_level = filter_level;
+    f->inner_limit = interior_limit;
+    f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
+                      mb->mode == VP8_MVMODE_SPLIT;
+}
+
+static av_always_inline
+void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
+               int mb_x, int mb_y, int is_vp7)
+{
+    int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
+    int filter_level = f->filter_level;
+    int inner_limit = f->inner_limit;
+    int inner_filter = f->inner_filter;
+    int linesize = s->linesize;
+    int uvlinesize = s->uvlinesize;
+    static const uint8_t hev_thresh_lut[2][64] = {
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+          3, 3, 3, 3 },
+        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+          2, 2, 2, 2 }
+    };
+
+    if (!filter_level)
+        return;
+
+    if (is_vp7) {
+        bedge_lim_y  = filter_level;
+        bedge_lim_uv = filter_level * 2;
+        mbedge_lim   = filter_level + 2;
+    } else {
+        bedge_lim_y  =
+        bedge_lim_uv = filter_level * 2 + inner_limit;
+        mbedge_lim   = bedge_lim_y + 4;
+    }
+
+    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
+
+    if (mb_x) {
+        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+    }
+
+#define H_LOOP_FILTER_16Y_INNER(cond)                                         \
+    if (cond && inner_filter) {                                               \
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize,           \
+                                             bedge_lim_y, inner_limit,        \
+                                             hev_thresh);                     \
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize,           \
+                                             bedge_lim_y, inner_limit,        \
+                                             hev_thresh);                     \
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize,           \
+                                             bedge_lim_y, inner_limit,        \
+                                             hev_thresh);                     \
+        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,         \
+                                             uvlinesize,  bedge_lim_uv,       \
+                                             inner_limit, hev_thresh);        \
+    }
+
+    H_LOOP_FILTER_16Y_INNER(!is_vp7)
+
+    if (mb_y) {
+        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
+                                       mbedge_lim, inner_limit, hev_thresh);
+    }
+
+    if (inner_filter) {
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
+                                             linesize, bedge_lim_y,
+                                             inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
+                                             linesize, bedge_lim_y,
+                                             inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
+                                             linesize, bedge_lim_y,
+                                             inner_limit, hev_thresh);
+        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
+                                             dst[2] +  4 * uvlinesize,
+                                             uvlinesize, bedge_lim_uv,
+                                             inner_limit, hev_thresh);
+    }
+
+    H_LOOP_FILTER_16Y_INNER(is_vp7)
+}
+
+static av_always_inline
+void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
+                      int mb_x, int mb_y)
+{
+    int mbedge_lim, bedge_lim;
+    int filter_level = f->filter_level;
+    int inner_limit  = f->inner_limit;
+    int inner_filter = f->inner_filter;
+    int linesize     = s->linesize;
+
+    if (!filter_level)
+        return;
+
+    bedge_lim  = 2 * filter_level + inner_limit;
+    mbedge_lim = bedge_lim + 4;
+
+    if (mb_x)
+        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
+    if (inner_filter) {
+        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
+    }
+
+    if (mb_y)
+        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
+    if (inner_filter) {
+        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
+    }
+}
+
+#define MARGIN (16 << 2)
+static av_always_inline
+void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
+                                    VP8Frame *prev_frame, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    int mb_x, mb_y;
+
+    s->mv_min.y = -MARGIN;
+    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+        VP8Macroblock *mb = s->macroblocks_base +
+                            ((s->mb_width + 1) * (mb_y + 1) + 1);
+        int mb_xy = mb_y * s->mb_width;
+
+        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
+
+        s->mv_min.x = -MARGIN;
+        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
+            if (mb_y == 0)
+                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
+                         DC_PRED * 0x01010101);
+            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
+                           prev_frame && prev_frame->seg_map ?
+                           prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
+            s->mv_min.x -= 64;
+            s->mv_max.x -= 64;
+        }
+        s->mv_min.y -= 64;
+        s->mv_max.y -= 64;
+    }
+}
+
+static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
+                                   VP8Frame *prev_frame)
+{
+    vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
+}
+
+static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
+                                   VP8Frame *prev_frame)
+{
+    vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
+}
+
+#if HAVE_THREADS
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
+    do {                                                                      \
+        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
+        if (otd->thread_mb_pos < tmp) {                                       \
+            pthread_mutex_lock(&otd->lock);                                   \
+            td->wait_mb_pos = tmp;                                            \
+            do {                                                              \
+                if (otd->thread_mb_pos >= tmp)                                \
+                    break;                                                    \
+                pthread_cond_wait(&otd->cond, &otd->lock);                    \
+            } while (1);                                                      \
+            td->wait_mb_pos = INT_MAX;                                        \
+            pthread_mutex_unlock(&otd->lock);                                 \
+        }                                                                     \
+    } while (0)
+
+#define update_pos(td, mb_y, mb_x)                                            \
+    do {                                                                      \
+        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
+        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
+                               (num_jobs > 1);                                \
+        int is_null          = !next_td || !prev_td;                          \
+        int pos_check        = (is_null) ? 1                                  \
+                                         : (next_td != td &&                  \
+                                            pos >= next_td->wait_mb_pos) ||   \
+                                           (prev_td != td &&                  \
+                                            pos >= prev_td->wait_mb_pos);     \
+        td->thread_mb_pos = pos;                                              \
+        if (sliced_threading && pos_check) {                                  \
+            pthread_mutex_lock(&td->lock);                                    \
+            pthread_cond_broadcast(&td->cond);                                \
+            pthread_mutex_unlock(&td->lock);                                  \
+        }                                                                     \
+    } while (0)
+#else
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
+#define update_pos(td, mb_y, mb_x) while(0)
+#endif
+
+static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
+    int mb_y = td->thread_mb_pos >> 16;
+    int mb_x, mb_xy = mb_y * s->mb_width;
+    int num_jobs = s->num_jobs;
+    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
+    VP56RangeCoder *c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
+    VP8Macroblock *mb;
+    uint8_t *dst[3] = {
+        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
+        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
+        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
+    };
+    if (mb_y == 0)
+        prev_td = td;
+    else
+        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
+    if (mb_y == s->mb_height - 1)
+        next_td = td;
+    else
+        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
+    if (s->mb_layout == 1)
+        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
+    else {
+        // Make sure the previous frame has read its segmentation map,
+        // if we re-use the same map.
+        if (prev_frame && s->segmentation.enabled &&
+            !s->segmentation.update_map)
+            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
+        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
+        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
+        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
+    }
+
+    if (!is_vp7 || mb_y == 0)
+        memset(td->left_nnz, 0, sizeof(td->left_nnz));
+
+    s->mv_min.x = -MARGIN;
+    s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
+        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
+        if (prev_td != td) {
+            if (threadnr != 0) {
+                check_thread_pos(td, prev_td,
+                                 mb_x + (is_vp7 ? 2 : 1),
+                                 mb_y - (is_vp7 ? 2 : 1));
+            } else {
+                check_thread_pos(td, prev_td,
+                                 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
+                                 mb_y - (is_vp7 ? 2 : 1));
+            }
+        }
+
+        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
+                         s->linesize, 4);
+        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
+                         dst[2] - dst[1], 2);
+
+        if (!s->mb_layout)
+            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
+                           prev_frame && prev_frame->seg_map ?
+                           prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
+
+        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
+
+        if (!mb->skip)
+            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
+
+        if (mb->mode <= MODE_I4x4)
+            intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
+        else
+            inter_predict(s, td, dst, mb, mb_x, mb_y);
+
+        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
+
+        if (!mb->skip) {
+            idct_mb(s, td, dst, mb);
+        } else {
+            AV_ZERO64(td->left_nnz);
+            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
+
+            /* Reset DC block predictors if they would exist
+             * if the mb had coefficients */
+            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
+                td->left_nnz[8]     = 0;
+                s->top_nnz[mb_x][8] = 0;
+            }
+        }
+
+        if (s->deblock_filter)
+            filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
+
+        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
+            if (s->filter.simple)
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 NULL, NULL, s->linesize, 0, 1);
+            else
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+        }
+
+        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
+
+        dst[0]      += 16;
+        dst[1]      += 8;
+        dst[2]      += 8;
+        s->mv_min.x -= 64;
+        s->mv_max.x -= 64;
+
+        if (mb_x == s->mb_width + 1) {
+            update_pos(td, mb_y, s->mb_width + 3);
+        } else {
+            update_pos(td, mb_y, mb_x);
+        }
+    }
+}
+
+static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr)
+{
+    decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
+}
+
+static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr)
+{
+    decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
+}
+
+static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    VP8ThreadData *td = &s->thread_data[threadnr];
+    int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
+    AVFrame *curframe = s->curframe->tf.f;
+    VP8Macroblock *mb;
+    VP8ThreadData *prev_td, *next_td;
+    uint8_t *dst[3] = {
+        curframe->data[0] + 16 * mb_y * s->linesize,
+        curframe->data[1] +  8 * mb_y * s->uvlinesize,
+        curframe->data[2] +  8 * mb_y * s->uvlinesize
+    };
+
+    if (s->mb_layout == 1)
+        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
+    else
+        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
+
+    if (mb_y == 0)
+        prev_td = td;
+    else
+        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
+    if (mb_y == s->mb_height - 1)
+        next_td = td;
+    else
+        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
+
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
+        VP8FilterStrength *f = &td->filter_strength[mb_x];
+        if (prev_td != td)
+            check_thread_pos(td, prev_td,
+                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
+        if (next_td != td)
+            if (next_td != &s->thread_data[0])
+                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
+
+        if (num_jobs == 1) {
+            if (s->filter.simple)
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 NULL, NULL, s->linesize, 0, 1);
+            else
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+        }
+
+        if (s->filter.simple)
+            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
+        else
+            filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
+        dst[0] += 16;
+        dst[1] += 8;
+        dst[2] += 8;
+
+        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
+    }
+}
+
+static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr)
+{
+    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
+}
+
+static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr)
+{
+    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
+}
+
+static av_always_inline
+int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
+                              int threadnr, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    VP8ThreadData *td = &s->thread_data[jobnr];
+    VP8ThreadData *next_td = NULL, *prev_td = NULL;
+    VP8Frame *curframe = s->curframe;
+    int mb_y, num_jobs = s->num_jobs;
+
+    td->thread_nr = threadnr;
+    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
+        if (mb_y >= s->mb_height)
+            break;
+        td->thread_mb_pos = mb_y << 16;
+        s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
+        if (s->deblock_filter)
+            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
+        update_pos(td, mb_y, INT_MAX & 0xFFFF);
+
+        s->mv_min.y -= 64;
+        s->mv_max.y -= 64;
+
+        if (avctx->active_thread_type == FF_THREAD_FRAME)
+            ff_thread_report_progress(&curframe->tf, mb_y, 0);
+    }
+
+    return 0;
+}
+
+static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
+                                    int jobnr, int threadnr)
+{
+    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
+}
+
+static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
+                                    int jobnr, int threadnr)
+{
+    return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
+}
+
+
+static av_always_inline
+int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                      AVPacket *avpkt, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    int ret, i, referenced, num_jobs;
+    enum AVDiscard skip_thresh;
+    VP8Frame *av_uninit(curframe), *prev_frame;
+
+    if (is_vp7)
+        ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
+    else
+        ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
+
+    if (ret < 0)
+        goto err;
+
+    prev_frame = s->framep[VP56_FRAME_CURRENT];
+
+    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
+                 s->update_altref == VP56_FRAME_CURRENT;
+
+    skip_thresh = !referenced ? AVDISCARD_NONREF
+                              : !s->keyframe ? AVDISCARD_NONKEY
+                                             : AVDISCARD_ALL;
+
+    if (avctx->skip_frame >= skip_thresh) {
+        s->invisible = 1;
+        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
+        goto skip_decode;
+    }
+    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
+
+    // release no longer referenced frames
+    for (i = 0; i < 5; i++)
+        if (s->frames[i].tf.f->data[0] &&
+            &s->frames[i] != prev_frame &&
+            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
+            vp8_release_frame(s, &s->frames[i]);
+
+    curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
+
+    if (!s->colorspace)
+        avctx->colorspace = AVCOL_SPC_BT470BG;
+    if (s->fullrange)
+        avctx->color_range = AVCOL_RANGE_JPEG;
+    else
+        avctx->color_range = AVCOL_RANGE_MPEG;
+
+    /* Given that arithmetic probabilities are updated every frame, it's quite
+     * likely that the values we have on a random interframe are complete
+     * junk if we didn't start decode on a keyframe. So just don't display
+     * anything rather than junk. */
+    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
+                         !s->framep[VP56_FRAME_GOLDEN]   ||
+                         !s->framep[VP56_FRAME_GOLDEN2])) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Discarding interframe without a prior keyframe!\n");
+        ret = AVERROR_INVALIDDATA;
+        goto err;
+    }
+
+    curframe->tf.f->key_frame = s->keyframe;
+    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
+                                            : AV_PICTURE_TYPE_P;
+    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
+        goto err;
+
+    // check if golden and altref are swapped
+    if (s->update_altref != VP56_FRAME_NONE)
+        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
+    else
+        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
+
+    if (s->update_golden != VP56_FRAME_NONE)
+        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
+    else
+        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
+
+    if (s->update_last)
+        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
+    else
+        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
+
+    s->next_framep[VP56_FRAME_CURRENT] = curframe;
+
+    if (avctx->codec->update_thread_context)
+        ff_thread_finish_setup(avctx);
+
+    s->linesize   = curframe->tf.f->linesize[0];
+    s->uvlinesize = curframe->tf.f->linesize[1];
+
+    memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
+    /* Zero macroblock structures for top/top-left prediction
+     * from outside the frame. */
+    if (!s->mb_layout)
+        memset(s->macroblocks + s->mb_height * 2 - 1, 0,
+               (s->mb_width + 1) * sizeof(*s->macroblocks));
+    if (!s->mb_layout && s->keyframe)
+        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
+
+    memset(s->ref_count, 0, sizeof(s->ref_count));
+
+    if (s->mb_layout == 1) {
+        // Make sure the previous frame has read its segmentation map,
+        // if we re-use the same map.
+        if (prev_frame && s->segmentation.enabled &&
+            !s->segmentation.update_map)
+            ff_thread_await_progress(&prev_frame->tf, 1, 0);
+        if (is_vp7)
+            vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
+        else
+            vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
+    }
+
+    if (avctx->active_thread_type == FF_THREAD_FRAME)
+        num_jobs = 1;
+    else
+        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
+    s->num_jobs   = num_jobs;
+    s->curframe   = curframe;
+    s->prev_frame = prev_frame;
+    s->mv_min.y   = -MARGIN;
+    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
+    for (i = 0; i < MAX_THREADS; i++) {
+        s->thread_data[i].thread_mb_pos = 0;
+        s->thread_data[i].wait_mb_pos   = INT_MAX;
+    }
+    if (is_vp7)
+        avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
+                        num_jobs);
+    else
+        avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
+                        num_jobs);
+
+    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
+    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
+
+skip_decode:
+    // if future frames don't use the updated probabilities,
+    // reset them to the values we saved
+    if (!s->update_probabilities)
+        s->prob[0] = s->prob[1];
+
+    if (!s->invisible) {
+        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
+            return ret;
+        *got_frame = 1;
+    }
+
+    return avpkt->size;
+err:
+    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
+    return ret;
+}
+
+int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
+}
+
+#if CONFIG_VP7_DECODER
+static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                            AVPacket *avpkt)
+{
+    return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
+}
+#endif /* CONFIG_VP7_DECODER */
+
+av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
+{
+    VP8Context *s = avctx->priv_data;
+    int i;
+
+    if (!s)
+        return 0;
+
+    vp8_decode_flush_impl(avctx, 1);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
+        av_frame_free(&s->frames[i].tf.f);
+
+    return 0;
+}
+
+static av_cold int vp8_init_frames(VP8Context *s)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
+        s->frames[i].tf.f = av_frame_alloc();
+        if (!s->frames[i].tf.f)
+            return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
+static av_always_inline
+int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
+{
+    VP8Context *s = avctx->priv_data;
+    int ret;
+
+    s->avctx = avctx;
+    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    avctx->internal->allocate_progress = 1;
+
+    ff_videodsp_init(&s->vdsp, 8);
+
+    ff_vp78dsp_init(&s->vp8dsp);
+    if (CONFIG_VP7_DECODER && is_vp7) {
+        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
+        ff_vp7dsp_init(&s->vp8dsp);
+        s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
+        s->filter_mb_row           = vp7_filter_mb_row;
+    } else if (CONFIG_VP8_DECODER && !is_vp7) {
+        ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
+        ff_vp8dsp_init(&s->vp8dsp);
+        s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
+        s->filter_mb_row           = vp8_filter_mb_row;
+    }
+
+    /* does not change for VP8 */
+    memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
+
+    if ((ret = vp8_init_frames(s)) < 0) {
+        ff_vp8_decode_free(avctx);
+        return ret;
+    }
+
+    return 0;
+}
+
+#if CONFIG_VP7_DECODER
+static int vp7_decode_init(AVCodecContext *avctx)
+{
+    return vp78_decode_init(avctx, IS_VP7);
+}
+#endif /* CONFIG_VP7_DECODER */
+
+av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
+{
+    return vp78_decode_init(avctx, IS_VP8);
+}
+
+#if CONFIG_VP8_DECODER
+#if HAVE_THREADS
+static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
+{
+    VP8Context *s = avctx->priv_data;
+    int ret;
+
+    s->avctx = avctx;
+
+    if ((ret = vp8_init_frames(s)) < 0) {
+        ff_vp8_decode_free(avctx);
+        return ret;
+    }
+
+    return 0;
+}
+
+#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
+
+static int vp8_decode_update_thread_context(AVCodecContext *dst,
+                                            const AVCodecContext *src)
+{
+    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
+    int i;
+
+    if (s->macroblocks_base &&
+        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
+        free_buffers(s);
+        s->mb_width  = s_src->mb_width;
+        s->mb_height = s_src->mb_height;
+    }
+
+    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
+    s->segmentation = s_src->segmentation;
+    s->lf_delta     = s_src->lf_delta;
+    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
+        if (s_src->frames[i].tf.f->data[0]) {
+            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
+    s->framep[0] = REBASE(s_src->next_framep[0]);
+    s->framep[1] = REBASE(s_src->next_framep[1]);
+    s->framep[2] = REBASE(s_src->next_framep[2]);
+    s->framep[3] = REBASE(s_src->next_framep[3]);
+
+    return 0;
+}
+#endif /* HAVE_THREADS */
+#endif /* CONFIG_VP8_DECODER */
+
+#if CONFIG_VP7_DECODER
+AVCodec ff_vp7_decoder = {
+    .name                  = "vp7",
+    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP7"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_VP7,
+    .priv_data_size        = sizeof(VP8Context),
+    .init                  = vp7_decode_init,
+    .close                 = ff_vp8_decode_free,
+    .decode                = vp7_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1,
+    .flush                 = vp8_decode_flush,
+};
+#endif /* CONFIG_VP7_DECODER */
+
+#if CONFIG_VP8_DECODER
+AVCodec ff_vp8_decoder = {
+    .name                  = "vp8",
+    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_VP8,
+    .priv_data_size        = sizeof(VP8Context),
+    .init                  = ff_vp8_decode_init,
+    .close                 = ff_vp8_decode_free,
+    .decode                = ff_vp8_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+                             AV_CODEC_CAP_SLICE_THREADS,
+    .flush                 = vp8_decode_flush,
+    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
+};
+#endif /* CONFIG_VP7_DECODER */
diff --git a/media/ffvpx/libavcodec/vp8.h b/media/ffvpx/libavcodec/vp8.h
new file mode 100644
index 000000000..374e1388e
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8.h
@@ -0,0 +1,311 @@
+/*
+ * VP8 compatible video decoder
+ *
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2010 Fiona Glaser
+ * Copyright (C) 2012 Daniel Kang
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP8_H
+#define AVCODEC_VP8_H
+
+#include "libavutil/buffer.h"
+#include "libavutil/thread.h"
+
+#include "h264pred.h"
+#include "thread.h"
+#include "vp56.h"
+#include "vp8dsp.h"
+
+#define VP8_MAX_QUANT 127
+
+enum dct_token {
+    DCT_0,
+    DCT_1,
+    DCT_2,
+    DCT_3,
+    DCT_4,
+    DCT_CAT1,
+    DCT_CAT2,
+    DCT_CAT3,
+    DCT_CAT4,
+    DCT_CAT5,
+    DCT_CAT6,
+    DCT_EOB,
+
+    NUM_DCT_TOKENS
+};
+
+// used to signal 4x4 intra pred in luma MBs
+#define MODE_I4x4 4
+
+enum inter_mvmode {
+    VP8_MVMODE_ZERO = MODE_I4x4 + 1,
+    VP8_MVMODE_MV,
+    VP8_MVMODE_SPLIT
+};
+
+enum inter_splitmvmode {
+    VP8_SPLITMVMODE_16x8 = 0,    ///< 2 16x8 blocks (vertical)
+    VP8_SPLITMVMODE_8x16,        ///< 2 8x16 blocks (horizontal)
+    VP8_SPLITMVMODE_8x8,         ///< 2x2 blocks of 8x8px each
+    VP8_SPLITMVMODE_4x4,         ///< 4x4 blocks of 4x4px each
+    VP8_SPLITMVMODE_NONE,        ///< (only used in prediction) no split MVs
+};
+
+typedef struct VP8FilterStrength {
+    uint8_t filter_level;
+    uint8_t inner_limit;
+    uint8_t inner_filter;
+} VP8FilterStrength;
+
+typedef struct VP8Macroblock {
+    uint8_t skip;
+    // TODO: make it possible to check for at least (i4x4 or split_mv)
+    // in one op. are others needed?
+    uint8_t mode;
+    uint8_t ref_frame;
+    uint8_t partitioning;
+    uint8_t chroma_pred_mode;
+    uint8_t segment;
+    uint8_t intra4x4_pred_mode_mb[16];
+    DECLARE_ALIGNED(4, uint8_t, intra4x4_pred_mode_top)[4];
+    VP56mv mv;
+    VP56mv bmv[16];
+} VP8Macroblock;
+
+typedef struct VP8ThreadData {
+    DECLARE_ALIGNED(16, int16_t, block)[6][4][16];
+    DECLARE_ALIGNED(16, int16_t, block_dc)[16];
+    /**
+     * This is the index plus one of the last non-zero coeff
+     * for each of the blocks in the current macroblock.
+     * So, 0 -> no coeffs
+     *     1 -> dc-only (special transform)
+     *     2+-> full transform
+     */
+    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
+    /**
+     * For coeff decode, we need to know whether the above block had non-zero
+     * coefficients. This means for each macroblock, we need data for 4 luma
+     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+     * per macroblock. We keep the last row in top_nnz.
+     */
+    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+    int thread_nr;
+#if HAVE_THREADS
+    pthread_mutex_t lock;
+    pthread_cond_t cond;
+#endif
+    int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
+    int wait_mb_pos; // What the current thread is waiting on.
+
+#define EDGE_EMU_LINESIZE 32
+    DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE];
+    VP8FilterStrength *filter_strength;
+} VP8ThreadData;
+
+typedef struct VP8Frame {
+    ThreadFrame tf;
+    AVBufferRef *seg_map;
+} VP8Frame;
+
+typedef struct VP8intmv {
+    int x;
+    int y;
+} VP8intmv;
+
+#define MAX_THREADS 8
+typedef struct VP8Context {
+    VP8ThreadData *thread_data;
+    AVCodecContext *avctx;
+    VP8Frame *framep[4];
+    VP8Frame *next_framep[4];
+    VP8Frame *curframe;
+    VP8Frame *prev_frame;
+
+    uint16_t mb_width;   /* number of horizontal MB */
+    uint16_t mb_height;  /* number of vertical MB */
+    int linesize;
+    int uvlinesize;
+
+    uint8_t keyframe;
+    uint8_t deblock_filter;
+    uint8_t mbskip_enabled;
+    uint8_t profile;
+    VP8intmv mv_min;
+    VP8intmv mv_max;
+
+    int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type
+    int ref_count[3];
+
+    /**
+     * Base parameters for segmentation, i.e. per-macroblock parameters.
+     * These must be kept unchanged even if segmentation is not used for
+     * a frame, since the values persist between interframes.
+     */
+    struct {
+        uint8_t enabled;
+        uint8_t absolute_vals;
+        uint8_t update_map;
+        int8_t base_quant[4];
+        int8_t filter_level[4];     ///< base loop filter level
+    } segmentation;
+
+    struct {
+        uint8_t simple;
+        uint8_t level;
+        uint8_t sharpness;
+    } filter;
+
+    VP8Macroblock *macroblocks;
+
+    uint8_t *intra4x4_pred_mode_top;
+    uint8_t intra4x4_pred_mode_left[4];
+
+    /**
+     * Macroblocks can have one of 4 different quants in a frame when
+     * segmentation is enabled.
+     * If segmentation is disabled, only the first segment's values are used.
+     */
+    struct {
+        // [0] - DC qmul  [1] - AC qmul
+        int16_t luma_qmul[2];
+        int16_t luma_dc_qmul[2];    ///< luma dc-only block quant
+        int16_t chroma_qmul[2];
+    } qmat[4];
+
+    struct {
+        uint8_t enabled;    ///< whether each mb can have a different strength based on mode/ref
+
+        /**
+         * filter strength adjustment for the following macroblock modes:
+         * [0-3] - i16x16 (always zero)
+         * [4]   - i4x4
+         * [5]   - zero mv
+         * [6]   - inter modes except for zero or split mv
+         * [7]   - split mv
+         *  i16x16 modes never have any adjustment
+         */
+        int8_t mode[VP8_MVMODE_SPLIT + 1];
+
+        /**
+         * filter strength adjustment for macroblocks that reference:
+         * [0] - intra / VP56_FRAME_CURRENT
+         * [1] - VP56_FRAME_PREVIOUS
+         * [2] - VP56_FRAME_GOLDEN
+         * [3] - altref / VP56_FRAME_GOLDEN2
+         */
+        int8_t ref[4];
+    } lf_delta;
+
+    uint8_t (*top_border)[16 + 8 + 8];
+    uint8_t (*top_nnz)[9];
+
+    VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
+
+    /**
+     * These are all of the updatable probabilities for binary decisions.
+     * They are only implicitly reset on keyframes, making it quite likely
+     * for an interframe to desync if a prior frame's header was corrupt
+     * or missing outright!
+     */
+    struct {
+        uint8_t segmentid[3];
+        uint8_t mbskip;
+        uint8_t intra;
+        uint8_t last;
+        uint8_t golden;
+        uint8_t pred16x16[4];
+        uint8_t pred8x8c[3];
+        uint8_t token[4][16][3][NUM_DCT_TOKENS - 1];
+        uint8_t mvc[2][19];
+        uint8_t scan[16];
+    } prob[2];
+
+    VP8Macroblock *macroblocks_base;
+    int invisible;
+    int update_last;    ///< update VP56_FRAME_PREVIOUS with the current one
+    int update_golden;  ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
+    int update_altref;
+
+    /**
+     * If this flag is not set, all the probability updates
+     * are discarded after this frame is decoded.
+     */
+    int update_probabilities;
+
+    /**
+     * All coefficients are contained in separate arith coding contexts.
+     * There can be 1, 2, 4, or 8 of these after the header context.
+     */
+    int num_coeff_partitions;
+    VP56RangeCoder coeff_partition[8];
+    VideoDSPContext vdsp;
+    VP8DSPContext vp8dsp;
+    H264PredContext hpc;
+    vp8_mc_func put_pixels_tab[3][3][3];
+    VP8Frame frames[5];
+
+    uint8_t colorspace; ///< 0 is the only value allowed (meaning bt601)
+    uint8_t fullrange;  ///< whether we can skip clamping in dsp functions
+
+    int num_jobs;
+    /**
+     * This describes the macroblock memory layout.
+     * 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread).
+     * 1 -> Macroblocks for entire frame allocated (sliced thread).
+     */
+    int mb_layout;
+
+    void (*decode_mb_row_no_filter)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr);
+    void (*filter_mb_row)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr);
+
+    int vp7;
+
+    /**
+     * Fade bit present in bitstream (VP7)
+     */
+    int fade_present;
+
+    /**
+     * Interframe DC prediction (VP7)
+     * [0] VP56_FRAME_PREVIOUS
+     * [1] VP56_FRAME_GOLDEN
+     */
+    uint16_t inter_dc_pred[2][2];
+
+    /**
+     * Macroblock features (VP7)
+     */
+    uint8_t feature_enabled[4];
+    uint8_t feature_present_prob[4];
+    uint8_t feature_index_prob[4][3];
+    uint8_t feature_value[4][4];
+} VP8Context;
+
+int ff_vp8_decode_init(AVCodecContext *avctx);
+
+int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                        AVPacket *avpkt);
+
+int ff_vp8_decode_free(AVCodecContext *avctx);
+
+#endif /* AVCODEC_VP8_H */
diff --git a/media/ffvpx/libavcodec/vp8_parser.c b/media/ffvpx/libavcodec/vp8_parser.c
new file mode 100644
index 000000000..afc7f991e
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8_parser.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "parser.h"
+
+static int parse(AVCodecParserContext *s,
+                 AVCodecContext *avctx,
+                 const uint8_t **poutbuf, int *poutbuf_size,
+                 const uint8_t *buf, int buf_size)
+{
+    s->pict_type = (buf[0] & 0x01) ? AV_PICTURE_TYPE_P
+                                   : AV_PICTURE_TYPE_I;
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return buf_size;
+}
+
+AVCodecParser ff_vp8_parser = {
+    .codec_ids    = { AV_CODEC_ID_VP8 },
+    .parser_parse = parse,
+};
diff --git a/media/ffvpx/libavcodec/vp8data.h b/media/ffvpx/libavcodec/vp8data.h
new file mode 100644
index 000000000..5e6dea761
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8data.h
@@ -0,0 +1,830 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#ifndef AVCODEC_VP8DATA_H
+#define AVCODEC_VP8DATA_H
+
+#include "vp8.h"
+#include "h264pred.h"
+
+static const uint8_t vp7_pred4x4_mode[] = {
+    [DC_PRED8x8]    = DC_PRED,
+    [VERT_PRED8x8]  = TM_VP8_PRED,
+    [HOR_PRED8x8]   = TM_VP8_PRED,
+    [PLANE_PRED8x8] = TM_VP8_PRED,
+};
+
+static const uint8_t vp8_pred4x4_mode[] = {
+    [DC_PRED8x8]    = DC_PRED,
+    [VERT_PRED8x8]  = VERT_PRED,
+    [HOR_PRED8x8]   = HOR_PRED,
+    [PLANE_PRED8x8] = TM_VP8_PRED,
+};
+
+static const int8_t vp8_pred16x16_tree_intra[4][2] = {
+    {   -MODE_I4x4,              1 }, // '0'
+    {            2,              3 },
+    {  -DC_PRED8x8,  -VERT_PRED8x8 }, // '100', '101'
+    { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111'
+};
+
+static const int8_t vp8_pred16x16_tree_inter[4][2] = {
+    {    -DC_PRED8x8,            1 }, // '0'
+    {              2,            3 },
+    {  -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101'
+    { -PLANE_PRED8x8,   -MODE_I4x4 }, // '110', '111'
+};
+
+typedef struct VP7MVPred {
+    int8_t yoffset;
+    int8_t xoffset;
+    uint8_t subblock;
+    uint8_t score;
+} VP7MVPred;
+
+#define VP7_MV_PRED_COUNT 12
+static const VP7MVPred vp7_mv_pred[VP7_MV_PRED_COUNT] = {
+    { -1,  0, 12, 8 },
+    {  0, -1,  3, 8 },
+    { -1, -1, 15, 2 },
+    { -1,  1, 12, 2 },
+    { -2,  0, 12, 2 },
+    {  0, -2,  3, 2 },
+    { -1, -2, 15, 1 },
+    { -2, -1, 15, 1 },
+    { -2,  1, 12, 1 },
+    { -1,  2, 12, 1 },
+    { -2, -2, 15, 1 },
+    { -2,  2, 12, 1 },
+};
+
+static const int vp7_mode_contexts[31][4] = {
+    {   3,   3,   1, 246 },
+    {   7,  89,  66, 239 },
+    {  10,  90,  78, 238 },
+    {  14, 118,  95, 241 },
+    {  14, 123, 106, 238 },
+    {  20, 140, 109, 240 },
+    {  13, 155, 103, 238 },
+    {  21, 158,  99, 240 },
+    {  27,  82, 108, 232 },
+    {  19,  99, 123, 217 },
+    {  45, 139, 148, 236 },
+    {  50, 117, 144, 235 },
+    {  57, 128, 164, 238 },
+    {  69, 139, 171, 239 },
+    {  74, 154, 179, 238 },
+    { 112, 165, 186, 242 },
+    {  98, 143, 185, 245 },
+    { 105, 153, 190, 250 },
+    { 124, 167, 192, 245 },
+    { 131, 186, 203, 246 },
+    {  59, 184, 222, 224 },
+    { 148, 215, 214, 213 },
+    { 137, 211, 210, 219 },
+    { 190, 227, 128, 228 },
+    { 183, 228, 128, 228 },
+    { 194, 234, 128, 228 },
+    { 202, 236, 128, 228 },
+    { 205, 240, 128, 228 },
+    { 205, 244, 128, 228 },
+    { 225, 246, 128, 228 },
+    { 233, 251, 128, 228 },
+};
+
+static const int vp8_mode_contexts[6][4] = {
+    {   7,   1,   1, 143 },
+    {  14,  18,  14, 107 },
+    { 135,  64,  57,  68 },
+    {  60,  56, 128,  65 },
+    { 159, 134, 128,  34 },
+    { 234, 188, 128,  28 },
+};
+
+static const uint8_t vp8_mbsplits[5][16] = {
+    { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1 },
+    { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,  1,  1,  0,  0,  1,  1 },
+    { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2,  3,  3,  2,  2,  3,  3 },
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }
+};
+
+static const uint8_t vp8_mbfirstidx[4][16] = {
+    { 0, 8 },
+    { 0, 2 },
+    { 0, 2, 8, 10 },
+    { 0, 1, 2,  3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
+};
+
+static const uint8_t vp8_mbsplit_count[4] = {
+    2, 2, 4, 16
+};
+static const uint8_t vp8_mbsplit_prob[3] = {
+    110, 111, 150
+};
+
+static const uint8_t vp7_submv_prob[3] = {
+    180, 162, 25
+};
+
+static const uint8_t vp8_submv_prob[5][3] = {
+    { 147, 136,  18 },
+    { 106, 145,   1 },
+    { 179, 121,   1 },
+    { 223,   1,  34 },
+    { 208,   1,   1 }
+};
+
+static const uint8_t vp8_pred16x16_prob_intra[4] = {
+    145, 156, 163, 128
+};
+static const uint8_t vp8_pred16x16_prob_inter[4] = {
+    112,  86, 140,  37
+};
+
+static const int8_t vp8_pred4x4_tree[9][2] = {
+    {              -DC_PRED,                1 }, // '0'
+    {          -TM_VP8_PRED,                2 }, // '10'
+    {            -VERT_PRED,                3 }, // '110'
+    {                     4,                6 },
+    {             -HOR_PRED,                5 }, // '11100'
+    { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011'
+    {  -DIAG_DOWN_LEFT_PRED,                7 }, // '11110'
+    {       -VERT_LEFT_PRED,                8 }, // '111110'
+    {        -HOR_DOWN_PRED,     -HOR_UP_PRED }, // '1111110', '1111111'
+};
+
+static const int8_t vp8_pred8x8c_tree[3][2] = {
+    {   -DC_PRED8x8,              1 },  // '0'
+    { -VERT_PRED8x8,              2 },  // '10
+    {  -HOR_PRED8x8, -PLANE_PRED8x8 },  // '110', '111'
+};
+
+static const uint8_t vp8_pred8x8c_prob_intra[3] = {
+    142, 114, 183
+};
+static const uint8_t vp8_pred8x8c_prob_inter[3] = {
+    162, 101, 204
+};
+static const uint8_t vp8_pred4x4_prob_inter[9] = {
+    120, 90, 79, 133, 87, 85, 80, 111, 151
+};
+
+static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = {
+    {
+        {  39,  53, 200,  87,  26,  21,  43, 232, 171 },
+        {  56,  34,  51, 104, 114, 102,  29,  93,  77 },
+        {  88,  88, 147, 150,  42,  46,  45, 196, 205 },
+        { 107,  54,  32,  26,  51,   1,  81,  43,  31 },
+        {  39,  28,  85, 171,  58, 165,  90,  98,  64 },
+        {  34,  22, 116, 206,  23,  34,  43, 166,  73 },
+        {  34,  19,  21, 102, 132, 188,  16,  76, 124 },
+        {  68,  25, 106,  22,  64, 171,  36, 225, 114 },
+        {  62,  18,  78,  95,  85,  57,  50,  48,  51 },
+        {  43,  97, 183, 117,  85,  38,  35, 179,  61 },
+    },
+    {
+        { 112, 113,  77,  85, 179, 255,  38, 120, 114 },
+        {  40,  42,   1, 196, 245, 209,  10,  25, 109 },
+        { 193, 101,  35, 159, 215, 111,  89,  46, 111 },
+        { 100,  80,   8,  43, 154,   1,  51,  26,  71 },
+        {  88,  43,  29, 140, 166, 213,  37,  43, 154 },
+        {  61,  63,  30, 155,  67,  45,  68,   1, 209 },
+        {  41,  40,   5, 102, 211, 183,   4,   1, 221 },
+        { 142,  78,  78,  16, 255, 128,  34, 197, 171 },
+        {  51,  50,  17, 168, 209, 192,  23,  25,  82 },
+        {  60, 148,  31, 172, 219, 228,  21,  18, 111 },
+    },
+    {
+        { 175,  69, 143,  80,  85,  82,  72, 155, 103 },
+        {  56,  58,  10, 171, 218, 189,  17,  13, 152 },
+        { 231, 120,  48,  89, 115, 113, 120, 152, 112 },
+        { 144,  71,  10,  38, 171, 213, 144,  34,  26 },
+        { 114,  26,  17, 163,  44, 195,  21,  10, 173 },
+        { 121,  24,  80, 195,  26,  62,  44,  64,  85 },
+        {  63,  20,   8, 114, 114, 208,  12,   9, 226 },
+        { 170,  46,  55,  19, 136, 160,  33, 206,  71 },
+        {  81,  40,  11,  96, 182,  84,  29,  16,  36 },
+        { 152, 179,  64, 126, 170, 118,  46,  70,  95 },
+    },
+    {
+        {  75,  79, 123,  47,  51, 128,  81, 171,   1 },
+        {  57,  17,   5,  71, 102,  57,  53,  41,  49 },
+        { 125,  98,  42,  88, 104,  85, 117, 175,  82 },
+        { 115,  21,   2,  10, 102, 255, 166,  23,   6 },
+        {  38,  33,  13, 121,  57,  73,  26,   1,  85 },
+        {  41,  10,  67, 138,  77, 110,  90,  47, 114 },
+        {  57,  18,  10, 102, 102, 213,  34,  20,  43 },
+        { 101,  29,  16,  10,  85, 128, 101, 196,  26 },
+        { 117,  20,  15,  36, 163, 128,  68,   1,  26 },
+        {  95,  84,  53,  89, 128, 100, 113, 101,  45 },
+    },
+    {
+        {  63,  59,  90, 180,  59, 166,  93,  73, 154 },
+        {  40,  40,  21, 116, 143, 209,  34,  39, 175 },
+        { 138,  31,  36, 171,  27, 166,  38,  44, 229 },
+        {  57,  46,  22,  24, 128,   1,  54,  17,  37 },
+        {  47,  15,  16, 183,  34, 223,  49,  45, 183 },
+        {  46,  17,  33, 183,   6,  98,  15,  32, 183 },
+        {  40,   3,   9, 115,  51, 192,  18,   6, 223 },
+        {  65,  32,  73, 115,  28, 128,  23, 128, 205 },
+        {  87,  37,   9, 115,  59,  77,  64,  21,  47 },
+        {  67,  87,  58, 169,  82, 115,  26,  59, 179 },
+    },
+    {
+        {  54,  57, 112, 184,   5,  41,  38, 166, 213 },
+        {  30,  34,  26, 133, 152, 116,  10,  32, 134 },
+        { 104,  55,  44, 218,   9,  54,  53, 130, 226 },
+        {  75,  32,  12,  51, 192, 255, 160,  43,  51 },
+        {  39,  19,  53, 221,  26, 114,  32,  73, 255 },
+        {  31,   9,  65, 234,   2,  15,   1, 118,  73 },
+        {  56,  21,  23, 111,  59, 205,  45,  37, 192 },
+        {  88,  31,  35,  67, 102,  85,  55, 186,  85 },
+        {  55,  38,  70, 124,  73, 102,   1,  34,  98 },
+        {  64,  90,  70, 205,  40,  41,  23,  26,  57 },
+    },
+    {
+        {  86,  40,  64, 135, 148, 224,  45, 183, 128 },
+        {  22,  26,  17, 131, 240, 154,  14,   1, 209 },
+        { 164,  50,  31, 137, 154, 133,  25,  35, 218 },
+        {  83,  12,  13,  54, 192, 255,  68,  47,  28 },
+        {  45,  16,  21,  91,  64, 222,   7,   1, 197 },
+        {  56,  21,  39, 155,  60, 138,  23, 102, 213 },
+        {  18,  11,   7,  63, 144, 171,   4,   4, 246 },
+        {  85,  26,  85,  85, 128, 128,  32, 146, 171 },
+        {  35,  27,  10, 146, 174, 171,  12,  26, 128 },
+        {  51, 103,  44, 131, 131, 123,  31,   6, 158 },
+    },
+    {
+        {  68,  45, 128,  34,   1,  47,  11, 245, 171 },
+        {  62,  17,  19,  70, 146,  85,  55,  62,  70 },
+        { 102,  61,  71,  37,  34,  53,  31, 243, 192 },
+        {  75,  15,   9,   9,  64, 255, 184, 119,  16 },
+        {  37,  43,  37, 154, 100, 163,  85, 160,   1 },
+        {  63,   9,  92, 136,  28,  64,  32, 201,  85 },
+        {  56,   8,  17, 132, 137, 255,  55, 116, 128 },
+        {  86,   6,  28,   5,  64, 255,  25, 248,   1 },
+        {  58,  15,  20,  82, 135,  57,  26, 121,  40 },
+        {  69,  60,  71,  38,  73, 119,  28, 222,  37 },
+    },
+    {
+        { 101,  75, 128, 139, 118, 146, 116, 128,  85 },
+        {  56,  41,  15, 176, 236,  85,  37,   9,  62 },
+        { 190,  80,  35,  99, 180,  80, 126,  54,  45 },
+        { 146,  36,  19,  30, 171, 255,  97,  27,  20 },
+        {  71,  30,  17, 119, 118, 255,  17,  18, 138 },
+        { 101,  38,  60, 138,  55,  70,  43,  26, 142 },
+        {  32,  41,  20, 117, 151, 142,  20,  21, 163 },
+        { 138,  45,  61,  62, 219,   1,  81, 188,  64 },
+        { 112,  19,  12,  61, 195, 128,  48,   4,  24 },
+        {  85, 126,  47,  87, 176,  51,  41,  20,  32 },
+    },
+    {
+        {  66, 102, 167,  99,  74,  62,  40, 234, 128 },
+        {  41,  53,   9, 178, 241, 141,  26,   8, 107 },
+        { 134, 183,  89, 137,  98, 101, 106, 165, 148 },
+        { 104,  79,  12,  27, 217, 255,  87,  17,   7 },
+        {  74,  43,  26, 146,  73, 166,  49,  23, 157 },
+        {  65,  38, 105, 160,  51,  52,  31, 115, 128 },
+        {  47,  41,  14, 110, 182, 183,  21,  17, 194 },
+        {  87,  68,  71,  44, 114,  51,  15, 186,  23 },
+        {  66,  45,  25, 102, 197, 189,  23,  18,  22 },
+        {  72, 187, 100, 130, 157, 111,  32,  75,  80 },
+    },
+};
+
+static const int8_t vp8_segmentid_tree[][2] = {
+    {  1,  2 },
+    { -0, -1 }, // '00', '01'
+    { -2, -3 }, // '10', '11'
+};
+
+static const uint8_t vp8_coeff_band[16] = {
+    0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
+};
+
+/* Inverse of vp8_coeff_band: mappings of bands to coefficient indexes.
+ * Each list is -1-terminated. */
+static const int8_t vp8_coeff_band_indexes[8][10] = {
+    {  0, -1 },
+    {  1, -1 },
+    {  2, -1 },
+    {  3, -1 },
+    {  5, -1 },
+    {  6, -1 },
+    {  4,  7, 8, 9, 10, 11, 12, 13, 14, -1 },
+    { 15, -1 }
+};
+
+static const uint8_t vp8_dct_cat1_prob[] = {
+    159, 0
+};
+static const uint8_t vp8_dct_cat2_prob[] = {
+    165, 145, 0
+};
+static const uint8_t vp8_dct_cat3_prob[] = {
+    173, 148, 140, 0
+};
+static const uint8_t vp8_dct_cat4_prob[] = {
+    176, 155, 140, 135, 0
+};
+static const uint8_t vp8_dct_cat5_prob[] = {
+    180, 157, 141, 134, 130, 0
+};
+static const uint8_t vp8_dct_cat6_prob[] = {
+    254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
+};
+
+// only used for cat3 and above; cat 1 and 2 are referenced directly
+const uint8_t *const ff_vp8_dct_cat_prob[] = {
+    vp8_dct_cat3_prob,
+    vp8_dct_cat4_prob,
+    vp8_dct_cat5_prob,
+    vp8_dct_cat6_prob,
+};
+
+static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS - 1] = {
+    {
+        {
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+            { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+            { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1,  98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+            { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+            {  78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+        },
+        {
+            {   1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+            { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+            {  77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+            { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+            {  37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+            { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+            { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+            { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+            {  80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 246,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 198,  35, 237, 223, 193, 187, 162, 160, 145, 155,  62 },
+            { 131,  45, 198, 221, 172, 176, 220, 157, 252, 221,   1 },
+            {  68,  47, 146, 208, 149, 167, 221, 162, 255, 223, 128 },
+        },
+        {
+            {   1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+            { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+            {  81,  99, 181, 242, 176, 190, 249, 202, 255, 255, 128 },
+        },
+        {
+            {   1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+            {  99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+            {  23,  91, 163, 242, 170, 187, 247, 210, 255, 255, 128 },
+        },
+        {
+            {   1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+            { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+            {  44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+            {  94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+            {  22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 },
+        },
+        {
+            {   1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+            { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+            {  35,  77, 181, 251, 193, 211, 255, 205, 128, 128, 128 },
+        },
+        {
+            {   1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+            { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+            {  45,  99, 188, 251, 195, 217, 255, 224, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 203,   1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 137,   1, 177, 255, 224, 255, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 253,   9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+            { 175,  13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+            {  73,  17, 171, 221, 161, 179, 236, 167, 255, 234, 128 },
+        },
+        {
+            {   1,  95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+            { 239,  90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+            { 155,  77, 195, 248, 188, 195, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1,  24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+            { 201,  51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+            {  69,  46, 190, 239, 201, 218, 255, 228, 128, 128, 128 },
+        },
+        {
+            {   1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+            { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1,  16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 190,  36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+            { 149,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            {   1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+            { 213,  62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+            {  55,  93, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+        {
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+    {
+        {
+            { 202,  24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+            { 126,  38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+            {  61,  46, 138, 219, 151, 178, 240, 170, 255, 216, 128 },
+        },
+        {
+            {   1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+            { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+            {  39,  77, 162, 232, 172, 180, 245, 178, 255, 255, 128 },
+        },
+        {
+            {   1,  52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+            { 124,  74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+            {  24,  71, 130, 219, 154, 170, 243, 182, 255, 255, 128 },
+        },
+        {
+            {   1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+            { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+            {  28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 },
+        },
+        {
+            {   1,  81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+            { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+            {  20,  95, 153, 243, 164, 173, 255, 203, 128, 128, 128 },
+        },
+        {
+            {   1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+            { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+            {  47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 },
+        },
+        {
+            {   1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+            { 141,  84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+            {  42,  80, 160, 240, 162, 185, 255, 205, 128, 128, 128 },
+        },
+        {
+            {   1,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 244,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+            { 238,   1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+        },
+    },
+};
+
+static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS - 1] = {
+    {
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+            { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+    {
+        {
+            { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+            { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+        {
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+            { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+        },
+    },
+};
+
+static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT + 1] = {
+      4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
+     18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
+     29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
+     44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
+     59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+     75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+     91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
+    122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
+};
+
+static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT + 1] = {
+      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
+     20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
+     36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
+     52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
+     78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
+    110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
+    155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
+    213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
+};
+
+static const uint8_t vp8_mv_update_prob[2][19] = {
+    { 237,
+      246,
+      253, 253, 254, 254, 254, 254, 254,
+      254, 254, 254, 254, 254, 250, 250, 252, /* VP8 only: */ 254, 254 },
+    { 231,
+      243,
+      245, 253, 254, 254, 254, 254, 254,
+      254, 254, 254, 254, 254, 251, 251, 254, /* VP8 only: */ 254, 254 }
+};
+
+static const uint8_t vp7_mv_default_prob[2][17] = {
+    { 162,
+      128,
+      225, 146, 172, 147, 214,  39, 156,
+      247, 210, 135,  68, 138, 220, 239, 246 },
+    { 164,
+      128,
+      204, 170, 119, 235, 140, 230, 228,
+      244, 184, 201,  44, 173, 221, 239, 253 }
+};
+
+static const uint8_t vp8_mv_default_prob[2][19] = {
+    { 162,
+      128,
+      225, 146, 172, 147, 214, 39, 156,
+      128, 129, 132,  75, 145, 178, 206, 239, 254, 254 },
+    { 164,
+      128,
+      204, 170, 119, 235, 140, 230, 228,
+      128, 130, 130,  74, 148, 180, 203, 236, 254, 254 }
+};
+
+static const uint8_t vp7_feature_value_size[2][4] = {
+    { 7, 6, 0, 8 },
+    { 7, 6, 0, 5 },
+};
+
+static const int8_t vp7_feature_index_tree[4][2] =
+{
+    {  1,  2 },
+    { -0, -1 }, // '00', '01'
+    { -2, -3 }, // '10', '11'
+};
+
+static const uint16_t vp7_ydc_qlookup[] = {
+      4,   4,   5,   6,   6,   7,   8,   8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  33,  34,  35,  36,  36,  37,  38,  39,  39,  40,  41,
+     41,  42,  43,  43,  44,  45,  45,  46,  47,  48,  48,  49,  50,  51,  52,
+     53,  53,  54,  56,  57,  58,  59,  60,  62,  63,  65,  66,  68,  70,  72,
+     74,  76,  79,  81,  84,  87,  90,  93,  96, 100, 104, 108, 112, 116, 121,
+    126, 131, 136, 142, 148, 154, 160, 167, 174, 182, 189, 198, 206, 215, 224,
+    234, 244, 254, 265, 277, 288, 301, 313, 327, 340, 355, 370, 385, 401, 417,
+    434, 452, 470, 489, 509, 529, 550, 572,
+};
+
+static const uint16_t vp7_yac_qlookup[] = {
+       4,    4,   5,   5,   6,   6,   7,   8,   9,  10,  11,  12,   13,   15,
+      16,   17,  19,  20,  22,  23,  25,  26,  28,  29,  31,  32,   34,   35,
+      37,   38,  40,  41,  42,  44,  45,  46,  48,  49,  50,  51,   53,   54,
+      55,   56,  57,  58,  59,  61,  62,  63,  64,  65,  67,  68,   69,   70,
+      72,   73,  75,  76,  78,  80,  82,  84,  86,  88,  91,  93,   96,   99,
+     102,  105, 109, 112, 116, 121, 125, 130, 135, 140, 146, 152,  158,  165,
+     172,  180, 188, 196, 205, 214, 224, 234, 245, 256, 268, 281,  294,  308,
+     322,  337, 353, 369, 386, 404, 423, 443, 463, 484, 506, 529,  553,  578,
+     604,  631, 659, 688, 718, 749, 781, 814, 849, 885, 922, 960, 1000, 1041,
+    1083, 1127,
+};
+
+static const uint16_t vp7_y2dc_qlookup[] = {
+       7,    9,  11,  13,  15,  17,  19,  21,  23,  26,  28,  30,   33,   35,
+      37,   39,  42,  44,  46,  48,  51,  53,  55,  57,  59,  61,   63,   65,
+      67,   69,  70,  72,  74,  75,  77,  78,  80,  81,  83,  84,   85,   87,
+      88,   89,  90,  92,  93,  94,  95,  96,  97,  99, 100, 101,  102,  104,
+     105,  106, 108, 109, 111, 113, 114, 116, 118, 120, 123, 125,  128,  131,
+     134,  137, 140, 144, 148, 152, 156, 161, 166, 171, 176, 182,  188,  195,
+     202,  209, 217, 225, 234, 243, 253, 263, 274, 285, 297, 309,  322,  336,
+     350,  365, 381, 397, 414, 432, 450, 470, 490, 511, 533, 556,  579,  604,
+     630,  656, 684, 713, 742, 773, 805, 838, 873, 908, 945, 983, 1022, 1063,
+    1105, 1148,
+};
+
+static const uint16_t vp7_y2ac_qlookup[] = {
+       7,    9,   11,   13,   16,   18,   21,   24,   26,   29,   32,   35,
+      38,   41,   43,   46,   49,   52,   55,   58,   61,   64,   66,   69,
+      72,   74,   77,   79,   82,   84,   86,   88,   91,   93,   95,   97,
+      98,  100,  102,  104,  105,  107,  109,  110,  112,  113,  115,  116,
+     117,  119,  120,  122,  123,  125,  127,  128,  130,  132,  134,  136,
+     138,  141,  143,  146,  149,  152,  155,  158,  162,  166,  171,  175,
+     180,  185,  191,  197,  204,  210,  218,  226,  234,  243,  252,  262,
+     273,  284,  295,  308,  321,  335,  350,  365,  381,  398,  416,  435,
+     455,  476,  497,  520,  544,  569,  595,  622,  650,  680,  711,  743,
+     776,  811,  848,  885,  925,  965, 1008, 1052, 1097, 1144, 1193, 1244,
+    1297, 1351, 1407, 1466, 1526, 1588, 1652, 1719,
+};
+
+#endif /* AVCODEC_VP8DATA_H */
diff --git a/media/ffvpx/libavcodec/vp8dsp.c b/media/ffvpx/libavcodec/vp8dsp.c
new file mode 100644
index 000000000..07bea69c7
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8dsp.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ * Copyright (C) 2014 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "mathops.h"
+#include "vp8dsp.h"
+
+#define MK_IDCT_DC_ADD4_C(name)                                               \
+static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16],     \
+                                      ptrdiff_t stride)                       \
+{                                                                             \
+    name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride);           \
+    name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride);           \
+    name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride);           \
+    name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride);           \
+}                                                                             \
+                                                                              \
+static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16],      \
+                                     ptrdiff_t stride)                        \
+{                                                                             \
+    name ## _idct_dc_add_c(dst +  0, block[0], stride);                       \
+    name ## _idct_dc_add_c(dst +  4, block[1], stride);                       \
+    name ## _idct_dc_add_c(dst +  8, block[2], stride);                       \
+    name ## _idct_dc_add_c(dst + 12, block[3], stride);                       \
+}
+
+#if CONFIG_VP7_DECODER
+static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, a1, b1, c1, d1;
+    int16_t tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170;
+        b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170;
+        c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274;
+        d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540;
+        tmp[i * 4 + 0] = (a1 + d1) >> 14;
+        tmp[i * 4 + 3] = (a1 - d1) >> 14;
+        tmp[i * 4 + 1] = (b1 + c1) >> 14;
+        tmp[i * 4 + 2] = (b1 - c1) >> 14;
+    }
+
+    for (i = 0; i < 4; i++) {
+        a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
+        b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
+        c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
+        d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
+        AV_ZERO64(dc + i * 4);
+        block[0][i][0] = (a1 + d1 + 0x20000) >> 18;
+        block[3][i][0] = (a1 - d1 + 0x20000) >> 18;
+        block[1][i][0] = (b1 + c1 + 0x20000) >> 18;
+        block[2][i][0] = (b1 - c1 + 0x20000) >> 18;
+    }
+}
+
+static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18;
+    dc[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        block[i][0][0] = val;
+        block[i][1][0] = val;
+        block[i][2][0] = val;
+        block[i][3][0] = val;
+    }
+}
+
+static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, a1, b1, c1, d1;
+    int16_t tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170;
+        b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
+        c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
+        d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
+        AV_ZERO64(block + i * 4);
+        tmp[i * 4 + 0] = (a1 + d1) >> 14;
+        tmp[i * 4 + 3] = (a1 - d1) >> 14;
+        tmp[i * 4 + 1] = (b1 + c1) >> 14;
+        tmp[i * 4 + 2] = (b1 - c1) >> 14;
+    }
+
+    for (i = 0; i < 4; i++) {
+        a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
+        b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
+        c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
+        d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
+        dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] +
+                                            ((a1 + d1 + 0x20000) >> 18));
+        dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] +
+                                            ((a1 - d1 + 0x20000) >> 18));
+        dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] +
+                                            ((b1 + c1 + 0x20000) >> 18));
+        dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] +
+                                            ((b1 - c1 + 0x20000) >> 18));
+    }
+}
+
+static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
+    block[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
+        dst   += stride;
+    }
+}
+
+MK_IDCT_DC_ADD4_C(vp7)
+#endif /* CONFIG_VP7_DECODER */
+
+// TODO: Maybe add dequant
+#if CONFIG_VP8_DECODER
+static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, t0, t1, t2, t3;
+
+    for (i = 0; i < 4; i++) {
+        t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
+        t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
+        t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
+        t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
+
+        dc[0 * 4 + i] = t0 + t1;
+        dc[1 * 4 + i] = t3 + t2;
+        dc[2 * 4 + i] = t0 - t1;
+        dc[3 * 4 + i] = t3 - t2;
+    }
+
+    for (i = 0; i < 4; i++) {
+        t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
+        t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
+        t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
+        t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
+        AV_ZERO64(dc + i * 4);
+
+        block[i][0][0] = (t0 + t1) >> 3;
+        block[i][1][0] = (t3 + t2) >> 3;
+        block[i][2][0] = (t0 - t1) >> 3;
+        block[i][3][0] = (t3 - t2) >> 3;
+    }
+}
+
+static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
+{
+    int i, val = (dc[0] + 3) >> 3;
+    dc[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        block[i][0][0] = val;
+        block[i][1][0] = val;
+        block[i][2][0] = val;
+        block[i][3][0] = val;
+    }
+}
+
+#define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
+#define MUL_35468(a)  (((a) * 35468) >> 16)
+
+static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, t0, t1, t2, t3;
+    int16_t tmp[16];
+
+    for (i = 0; i < 4; i++) {
+        t0 = block[0 * 4 + i] + block[2 * 4 + i];
+        t1 = block[0 * 4 + i] - block[2 * 4 + i];
+        t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]);
+        t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]);
+        block[0 * 4 + i] = 0;
+        block[1 * 4 + i] = 0;
+        block[2 * 4 + i] = 0;
+        block[3 * 4 + i] = 0;
+
+        tmp[i * 4 + 0] = t0 + t3;
+        tmp[i * 4 + 1] = t1 + t2;
+        tmp[i * 4 + 2] = t1 - t2;
+        tmp[i * 4 + 3] = t0 - t3;
+    }
+
+    for (i = 0; i < 4; i++) {
+        t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i];
+        t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i];
+        t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]);
+        t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]);
+
+        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
+        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
+        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
+        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
+        dst   += stride;
+    }
+}
+
+static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
+{
+    int i, dc = (block[0] + 4) >> 3;
+    block[0] = 0;
+
+    for (i = 0; i < 4; i++) {
+        dst[0] = av_clip_uint8(dst[0] + dc);
+        dst[1] = av_clip_uint8(dst[1] + dc);
+        dst[2] = av_clip_uint8(dst[2] + dc);
+        dst[3] = av_clip_uint8(dst[3] + dc);
+        dst   += stride;
+    }
+}
+
+MK_IDCT_DC_ADD4_C(vp8)
+#endif /* CONFIG_VP8_DECODER */
+
+// because I like only having two parameters to pass functions...
+#define LOAD_PIXELS                                                           \
+    int av_unused p3 = p[-4 * stride];                                        \
+    int av_unused p2 = p[-3 * stride];                                        \
+    int av_unused p1 = p[-2 * stride];                                        \
+    int av_unused p0 = p[-1 * stride];                                        \
+    int av_unused q0 = p[ 0 * stride];                                        \
+    int av_unused q1 = p[ 1 * stride];                                        \
+    int av_unused q2 = p[ 2 * stride];                                        \
+    int av_unused q3 = p[ 3 * stride];
+
+#define clip_int8(n) (cm[(n) + 0x80] - 0x80)
+
+static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
+                                           int is4tap, int is_vp7)
+{
+    LOAD_PIXELS
+    int a, f1, f2;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+
+    a = 3 * (q0 - p0);
+
+    if (is4tap)
+        a += clip_int8(p1 - q1);
+
+    a = clip_int8(a);
+
+    // We deviate from the spec here with c(a+3) >> 3
+    // since that's what libvpx does.
+    f1 = FFMIN(a + 4, 127) >> 3;
+
+    if (is_vp7)
+        f2 = f1 - ((a & 7) == 4);
+    else
+        f2 = FFMIN(a + 3, 127) >> 3;
+
+    // Despite what the spec says, we do need to clamp here to
+    // be bitexact with libvpx.
+    p[-1 * stride] = cm[p0 + f2];
+    p[ 0 * stride] = cm[q0 - f1];
+
+    // only used for _inner on blocks without high edge variance
+    if (!is4tap) {
+        a              = (f1 + 1) >> 1;
+        p[-2 * stride] = cm[p1 + a];
+        p[ 1 * stride] = cm[q1 - a];
+    }
+}
+
+static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride,
+                                               int is4tap)
+{
+    filter_common(p, stride, is4tap, IS_VP7);
+}
+
+static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride,
+                                               int is4tap)
+{
+    filter_common(p, stride, is4tap, IS_VP8);
+}
+
+static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride,
+                                             int flim)
+{
+    LOAD_PIXELS
+    return FFABS(p0 - q0) <= flim;
+}
+
+static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride,
+                                             int flim)
+{
+    LOAD_PIXELS
+    return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
+}
+
+/**
+ * E - limit at the macroblock edge
+ * I - limit for interior difference
+ */
+#define NORMAL_LIMIT(vpn)                                                     \
+static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p,            \
+                                                       ptrdiff_t stride,      \
+                                                       int E, int I)          \
+{                                                                             \
+    LOAD_PIXELS                                                               \
+    return vp ## vpn ## _simple_limit(p, stride, E) &&                        \
+           FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&                      \
+           FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I &&                      \
+           FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I;                        \
+}
+
+NORMAL_LIMIT(7)
+NORMAL_LIMIT(8)
+
+// high edge variance
+static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
+{
+    LOAD_PIXELS
+    return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
+}
+
+static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
+{
+    int a0, a1, a2, w;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+
+    LOAD_PIXELS
+
+    w = clip_int8(p1 - q1);
+    w = clip_int8(w + 3 * (q0 - p0));
+
+    a0 = (27 * w + 63) >> 7;
+    a1 = (18 * w + 63) >> 7;
+    a2 =  (9 * w + 63) >> 7;
+
+    p[-3 * stride] = cm[p2 + a2];
+    p[-2 * stride] = cm[p1 + a1];
+    p[-1 * stride] = cm[p0 + a0];
+    p[ 0 * stride] = cm[q0 - a0];
+    p[ 1 * stride] = cm[q1 - a1];
+    p[ 2 * stride] = cm[q2 - a2];
+}
+
+#define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline)           \
+static maybe_inline                                                           \
+void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst,              \
+                                                   ptrdiff_t stride,          \
+                                                   int flim_E, int flim_I,    \
+                                                   int hev_thresh)            \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < size; i++)                                                \
+        if (vpn ## _normal_limit(dst + i * stridea, strideb,                  \
+                                 flim_E, flim_I)) {                           \
+            if (hev(dst + i * stridea, strideb, hev_thresh))                  \
+                vpn ## _filter_common(dst + i * stridea, strideb, 1);         \
+            else                                                              \
+                filter_mbedge(dst + i * stridea, strideb);                    \
+        }                                                                     \
+}                                                                             \
+                                                                              \
+static maybe_inline                                                           \
+void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst,        \
+                                                         ptrdiff_t stride,    \
+                                                         int flim_E,          \
+                                                         int flim_I,          \
+                                                         int hev_thresh)      \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < size; i++)                                                \
+        if (vpn ## _normal_limit(dst + i * stridea, strideb,                  \
+                                 flim_E, flim_I)) {                           \
+            int hv = hev(dst + i * stridea, strideb, hev_thresh);             \
+            if (hv)                                                           \
+                vpn ## _filter_common(dst + i * stridea, strideb, 1);         \
+            else                                                              \
+                vpn ## _filter_common(dst + i * stridea, strideb, 0);         \
+        }                                                                     \
+}
+
+#define UV_LOOP_FILTER(vpn, dir, stridea, strideb)                            \
+LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline)                  \
+static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU,               \
+                                                 uint8_t *dstV,               \
+                                                 ptrdiff_t stride, int fE,    \
+                                                 int fI, int hev_thresh)      \
+{                                                                             \
+    vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);     \
+    vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);     \
+}                                                                             \
+                                                                              \
+static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU,         \
+                                                       uint8_t *dstV,         \
+                                                       ptrdiff_t stride,      \
+                                                       int fE, int fI,        \
+                                                       int hev_thresh)        \
+{                                                                             \
+    vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI,            \
+                                             hev_thresh);                     \
+    vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI,            \
+                                             hev_thresh);                     \
+}
+
+#define LOOP_FILTER_SIMPLE(vpn)                                               \
+static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride,    \
+                                           int flim)                          \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < 16; i++)                                                  \
+        if (vpn ## _simple_limit(dst + i, stride, flim))                      \
+            vpn ## _filter_common(dst + i, stride, 1);                        \
+}                                                                             \
+                                                                              \
+static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride,    \
+                                           int flim)                          \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < 16; i++)                                                  \
+        if (vpn ## _simple_limit(dst + i * stride, 1, flim))                  \
+            vpn ## _filter_common(dst + i * stride, 1, 1);                    \
+}
+
+#define LOOP_FILTERS(vpn)                \
+    LOOP_FILTER(vpn, v, 16, 1, stride, ) \
+    LOOP_FILTER(vpn, h, 16, stride, 1, ) \
+    UV_LOOP_FILTER(vpn, v, 1, stride)    \
+    UV_LOOP_FILTER(vpn, h, stride, 1)    \
+    LOOP_FILTER_SIMPLE(vpn)              \
+
+static const uint8_t subpel_filters[7][6] = {
+    { 0,  6, 123,  12,  1, 0 },
+    { 2, 11, 108,  36,  8, 1 },
+    { 0,  9,  93,  50,  6, 0 },
+    { 3, 16,  77,  77, 16, 3 },
+    { 0,  6,  50,  93,  9, 0 },
+    { 1,  8,  36, 108, 11, 2 },
+    { 0,  1,  12, 123,  6, 0 },
+};
+
+#define PUT_PIXELS(WIDTH)                                                     \
+static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride,  \
+                                          uint8_t *src, ptrdiff_t srcstride,  \
+                                          int h, int x, int y)                \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < h; i++, dst += dststride, src += srcstride)               \
+        memcpy(dst, src, WIDTH);                                              \
+}
+
+PUT_PIXELS(16)
+PUT_PIXELS(8)
+PUT_PIXELS(4)
+
+#define FILTER_6TAP(src, F, stride)                                           \
+    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
+        F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] -             \
+        F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
+
+#define FILTER_4TAP(src, F, stride)                                           \
+    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
+        F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
+
+#define VP8_EPEL_H(SIZE, TAPS)                                                \
+static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst,            \
+                                                     ptrdiff_t dststride,     \
+                                                     uint8_t *src,            \
+                                                     ptrdiff_t srcstride,     \
+                                                     int h, int mx, int my)   \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[mx - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1);                  \
+        dst += dststride;                                                     \
+        src += srcstride;                                                     \
+    }                                                                         \
+}
+
+#define VP8_EPEL_V(SIZE, TAPS)                                                \
+static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst,            \
+                                                     ptrdiff_t dststride,     \
+                                                     uint8_t *src,            \
+                                                     ptrdiff_t srcstride,     \
+                                                     int h, int mx, int my)   \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[my - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride);          \
+        dst += dststride;                                                     \
+        src += srcstride;                                                     \
+    }                                                                         \
+}
+
+#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS)                                       \
+static void                                                                   \
+put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst,         \
+                                                        ptrdiff_t dststride,  \
+                                                        uint8_t *src,         \
+                                                        ptrdiff_t srcstride,  \
+                                                        int h, int mx,        \
+                                                        int my)               \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[mx - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE];                         \
+    uint8_t *tmp = tmp_array;                                                 \
+    src -= (2 - (VTAPS == 4)) * srcstride;                                    \
+                                                                              \
+    for (y = 0; y < h + VTAPS - 1; y++) {                                     \
+        for (x = 0; x < SIZE; x++)                                            \
+            tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1);                 \
+        tmp += SIZE;                                                          \
+        src += srcstride;                                                     \
+    }                                                                         \
+    tmp    = tmp_array + (2 - (VTAPS == 4)) * SIZE;                           \
+    filter = subpel_filters[my - 1];                                          \
+                                                                              \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE);              \
+        dst += dststride;                                                     \
+        tmp += SIZE;                                                          \
+    }                                                                         \
+}
+
+VP8_EPEL_H(16, 4)
+VP8_EPEL_H(8,  4)
+VP8_EPEL_H(4,  4)
+VP8_EPEL_H(16, 6)
+VP8_EPEL_H(8,  6)
+VP8_EPEL_H(4,  6)
+VP8_EPEL_V(16, 4)
+VP8_EPEL_V(8,  4)
+VP8_EPEL_V(4,  4)
+VP8_EPEL_V(16, 6)
+VP8_EPEL_V(8,  6)
+VP8_EPEL_V(4,  6)
+
+VP8_EPEL_HV(16, 4, 4)
+VP8_EPEL_HV(8,  4, 4)
+VP8_EPEL_HV(4,  4, 4)
+VP8_EPEL_HV(16, 4, 6)
+VP8_EPEL_HV(8,  4, 6)
+VP8_EPEL_HV(4,  4, 6)
+VP8_EPEL_HV(16, 6, 4)
+VP8_EPEL_HV(8,  6, 4)
+VP8_EPEL_HV(4,  6, 4)
+VP8_EPEL_HV(16, 6, 6)
+VP8_EPEL_HV(8,  6, 6)
+VP8_EPEL_HV(4,  6, 6)
+
+#define VP8_BILINEAR(SIZE)                                                    \
+static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
+                                             uint8_t *src, ptrdiff_t sstride, \
+                                             int h, int mx, int my)           \
+{                                                                             \
+    int a = 8 - mx, b = mx;                                                   \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
+        dst += dstride;                                                       \
+        src += sstride;                                                       \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
+                                             uint8_t *src, ptrdiff_t sstride, \
+                                             int h, int mx, int my)           \
+{                                                                             \
+    int c = 8 - my, d = my;                                                   \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;            \
+        dst += dstride;                                                       \
+        src += sstride;                                                       \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst,                   \
+                                              ptrdiff_t dstride,              \
+                                              uint8_t *src,                   \
+                                              ptrdiff_t sstride,              \
+                                              int h, int mx, int my)          \
+{                                                                             \
+    int a = 8 - mx, b = mx;                                                   \
+    int c = 8 - my, d = my;                                                   \
+    int x, y;                                                                 \
+    uint8_t tmp_array[(2 * SIZE + 1) * SIZE];                                 \
+    uint8_t *tmp = tmp_array;                                                 \
+    for (y = 0; y < h + 1; y++) {                                             \
+        for (x = 0; x < SIZE; x++)                                            \
+            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
+        tmp += SIZE;                                                          \
+        src += sstride;                                                       \
+    }                                                                         \
+    tmp = tmp_array;                                                          \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3;               \
+        dst += dstride;                                                       \
+        tmp += SIZE;                                                          \
+    }                                                                         \
+}
+
+VP8_BILINEAR(16)
+VP8_BILINEAR(8)
+VP8_BILINEAR(4)
+
+#define VP78_MC_FUNC(IDX, SIZE)                                               \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c;   \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
+
+#define VP78_BILINEAR_MC_FUNC(IDX, SIZE)                                      \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels   ## SIZE ## _c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
+    dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
+
+av_cold void ff_vp78dsp_init(VP8DSPContext *dsp)
+{
+    VP78_MC_FUNC(0, 16);
+    VP78_MC_FUNC(1, 8);
+    VP78_MC_FUNC(2, 4);
+
+    VP78_BILINEAR_MC_FUNC(0, 16);
+    VP78_BILINEAR_MC_FUNC(1, 8);
+    VP78_BILINEAR_MC_FUNC(2, 4);
+
+    if (ARCH_ARM)
+        ff_vp78dsp_init_arm(dsp);
+    if (ARCH_PPC)
+        ff_vp78dsp_init_ppc(dsp);
+    if (ARCH_X86)
+        ff_vp78dsp_init_x86(dsp);
+}
+
+#if CONFIG_VP7_DECODER
+LOOP_FILTERS(vp7)
+
+av_cold void ff_vp7dsp_init(VP8DSPContext *dsp)
+{
+    dsp->vp8_luma_dc_wht    = vp7_luma_dc_wht_c;
+    dsp->vp8_luma_dc_wht_dc = vp7_luma_dc_wht_dc_c;
+    dsp->vp8_idct_add       = vp7_idct_add_c;
+    dsp->vp8_idct_dc_add    = vp7_idct_dc_add_c;
+    dsp->vp8_idct_dc_add4y  = vp7_idct_dc_add4y_c;
+    dsp->vp8_idct_dc_add4uv = vp7_idct_dc_add4uv_c;
+
+    dsp->vp8_v_loop_filter16y = vp7_v_loop_filter16_c;
+    dsp->vp8_h_loop_filter16y = vp7_h_loop_filter16_c;
+    dsp->vp8_v_loop_filter8uv = vp7_v_loop_filter8uv_c;
+    dsp->vp8_h_loop_filter8uv = vp7_h_loop_filter8uv_c;
+
+    dsp->vp8_v_loop_filter16y_inner = vp7_v_loop_filter16_inner_c;
+    dsp->vp8_h_loop_filter16y_inner = vp7_h_loop_filter16_inner_c;
+    dsp->vp8_v_loop_filter8uv_inner = vp7_v_loop_filter8uv_inner_c;
+    dsp->vp8_h_loop_filter8uv_inner = vp7_h_loop_filter8uv_inner_c;
+
+    dsp->vp8_v_loop_filter_simple = vp7_v_loop_filter_simple_c;
+    dsp->vp8_h_loop_filter_simple = vp7_h_loop_filter_simple_c;
+}
+#endif /* CONFIG_VP7_DECODER */
+
+#if CONFIG_VP8_DECODER
+LOOP_FILTERS(vp8)
+
+av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
+{
+    dsp->vp8_luma_dc_wht    = vp8_luma_dc_wht_c;
+    dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
+    dsp->vp8_idct_add       = vp8_idct_add_c;
+    dsp->vp8_idct_dc_add    = vp8_idct_dc_add_c;
+    dsp->vp8_idct_dc_add4y  = vp8_idct_dc_add4y_c;
+    dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c;
+
+    dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
+    dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
+    dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
+    dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
+
+    dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
+    dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
+    dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
+    dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
+
+    dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
+    dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
+
+    if (ARCH_ARM)
+        ff_vp8dsp_init_arm(dsp);
+    if (ARCH_X86)
+        ff_vp8dsp_init_x86(dsp);
+    if (ARCH_MIPS)
+        ff_vp8dsp_init_mips(dsp);
+}
+#endif /* CONFIG_VP8_DECODER */
diff --git a/media/ffvpx/libavcodec/vp8dsp.h b/media/ffvpx/libavcodec/vp8dsp.h
new file mode 100644
index 000000000..0401c9221
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp8dsp.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ * Copyright (C) 2010 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * VP8 compatible video decoder
+ */
+
+#ifndef AVCODEC_VP8DSP_H
+#define AVCODEC_VP8DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef void (*vp8_mc_func)(uint8_t *dst /* align 8 */, ptrdiff_t dstStride,
+                            uint8_t *src /* align 1 */, ptrdiff_t srcStride,
+                            int h, int x, int y);
+
+typedef struct VP8DSPContext {
+    void (*vp8_luma_dc_wht)(int16_t block[4][4][16], int16_t dc[16]);
+    void (*vp8_luma_dc_wht_dc)(int16_t block[4][4][16], int16_t dc[16]);
+    void (*vp8_idct_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+    void (*vp8_idct_dc_add)(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+    void (*vp8_idct_dc_add4y)(uint8_t *dst, int16_t block[4][16],
+                              ptrdiff_t stride);
+    void (*vp8_idct_dc_add4uv)(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride);
+
+    // loop filter applied to edges between macroblocks
+    void (*vp8_v_loop_filter16y)(uint8_t *dst, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter16y)(uint8_t *dst, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_v_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter8uv)(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,
+                                 int flim_E, int flim_I, int hev_thresh);
+
+    // loop filter applied to inner macroblock edges
+    void (*vp8_v_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter16y_inner)(uint8_t *dst, ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_v_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV,
+                                       ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+    void (*vp8_h_loop_filter8uv_inner)(uint8_t *dstU, uint8_t *dstV,
+                                       ptrdiff_t stride,
+                                       int flim_E, int flim_I, int hev_thresh);
+
+    void (*vp8_v_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim);
+    void (*vp8_h_loop_filter_simple)(uint8_t *dst, ptrdiff_t stride, int flim);
+
+    /**
+     * first dimension: width>>3, height is assumed equal to width
+     * second dimension: 0 if no vertical interpolation is needed;
+     *                   1 4-tap vertical interpolation filter (my & 1)
+     *                   2 6-tap vertical interpolation filter (!(my & 1))
+     * third dimension: same as second dimension, for horizontal interpolation
+     * so something like put_vp8_epel_pixels_tab[width>>3][2*!!my-(my&1)][2*!!mx-(mx&1)](..., mx, my)
+     */
+    vp8_mc_func put_vp8_epel_pixels_tab[3][3][3];
+    vp8_mc_func put_vp8_bilinear_pixels_tab[3][3][3];
+} VP8DSPContext;
+
+void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                           int h, int x, int y);
+void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                          int h, int x, int y);
+void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+                          int h, int x, int y);
+
+void ff_vp7dsp_init(VP8DSPContext *c);
+
+void ff_vp78dsp_init(VP8DSPContext *c);
+void ff_vp78dsp_init_arm(VP8DSPContext *c);
+void ff_vp78dsp_init_ppc(VP8DSPContext *c);
+void ff_vp78dsp_init_x86(VP8DSPContext *c);
+
+void ff_vp8dsp_init(VP8DSPContext *c);
+void ff_vp8dsp_init_arm(VP8DSPContext *c);
+void ff_vp8dsp_init_x86(VP8DSPContext *c);
+void ff_vp8dsp_init_mips(VP8DSPContext *c);
+
+#define IS_VP7 1
+#define IS_VP8 0
+
+#endif /* AVCODEC_VP8DSP_H */
diff --git a/media/ffvpx/libavcodec/vp9.c b/media/ffvpx/libavcodec/vp9.c
new file mode 100644
index 000000000..3b721495d
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9.c
@@ -0,0 +1,4365 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "internal.h"
+#include "profiles.h"
+#include "thread.h"
+#include "videodsp.h"
+#include "vp56.h"
+#include "vp9.h"
+#include "vp9data.h"
+#include "vp9dsp.h"
+#include "libavutil/avassert.h"
+#include "libavutil/pixdesc.h"
+
+#define VP9_SYNCCODE 0x498342
+
+struct VP9Filter {
+    uint8_t level[8 * 8];
+    uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
+                              [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
+};
+
+typedef struct VP9Block {
+    uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
+    enum FilterMode filter;
+    VP56mv mv[4 /* b_idx */][2 /* ref */];
+    enum BlockSize bs;
+    enum TxfmMode tx, uvtx;
+    enum BlockLevel bl;
+    enum BlockPartition bp;
+} VP9Block;
+
+typedef struct VP9Context {
+    VP9SharedContext s;
+
+    VP9DSPContext dsp;
+    VideoDSPContext vdsp;
+    GetBitContext gb;
+    VP56RangeCoder c;
+    VP56RangeCoder *c_b;
+    unsigned c_b_size;
+    VP9Block *b_base, *b;
+    int pass;
+    int row, row7, col, col7;
+    uint8_t *dst[3];
+    ptrdiff_t y_stride, uv_stride;
+
+    uint8_t ss_h, ss_v;
+    uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
+    uint8_t last_keyframe;
+    // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
+    // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
+    // and are therefore per-stream. pix_fmt represents the value in the header
+    // of the currently processed frame.
+    int w, h;
+    enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
+    unsigned sb_cols, sb_rows, rows, cols;
+    ThreadFrame next_refs[8];
+
+    struct {
+        uint8_t lim_lut[64];
+        uint8_t mblim_lut[64];
+    } filter_lut;
+    unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
+    struct {
+        prob_context p;
+        uint8_t coef[4][2][2][6][6][3];
+    } prob_ctx[4];
+    struct {
+        prob_context p;
+        uint8_t coef[4][2][2][6][6][11];
+    } prob;
+    struct {
+        unsigned y_mode[4][10];
+        unsigned uv_mode[10][10];
+        unsigned filter[4][3];
+        unsigned mv_mode[7][4];
+        unsigned intra[4][2];
+        unsigned comp[5][2];
+        unsigned single_ref[5][2][2];
+        unsigned comp_ref[5][2];
+        unsigned tx32p[2][4];
+        unsigned tx16p[2][3];
+        unsigned tx8p[2][2];
+        unsigned skip[3][2];
+        unsigned mv_joint[4];
+        struct {
+            unsigned sign[2];
+            unsigned classes[11];
+            unsigned class0[2];
+            unsigned bits[10][2];
+            unsigned class0_fp[2][4];
+            unsigned fp[4];
+            unsigned class0_hp[2];
+            unsigned hp[2];
+        } mv_comp[2];
+        unsigned partition[4][4][4];
+        unsigned coef[4][2][2][6][6][3];
+        unsigned eob[4][2][2][6][6][2];
+    } counts;
+
+    // contextual (left/above) cache
+    DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
+    DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
+    DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
+    DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
+    DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
+    uint8_t *above_partition_ctx;
+    uint8_t *above_mode_ctx;
+    // FIXME maybe merge some of the below in a flags field?
+    uint8_t *above_y_nnz_ctx;
+    uint8_t *above_uv_nnz_ctx[2];
+    uint8_t *above_skip_ctx; // 1bit
+    uint8_t *above_txfm_ctx; // 2bit
+    uint8_t *above_segpred_ctx; // 1bit
+    uint8_t *above_intra_ctx; // 1bit
+    uint8_t *above_comp_ctx; // 1bit
+    uint8_t *above_ref_ctx; // 2bit
+    uint8_t *above_filter_ctx;
+    VP56mv (*above_mv_ctx)[2];
+
+    // whole-frame cache
+    uint8_t *intra_pred_data[3];
+    struct VP9Filter *lflvl;
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
+
+    // block reconstruction intermediates
+    int block_alloc_using_2pass;
+    int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
+    uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
+    struct { int x, y; } min_mv, max_mv;
+    DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
+    DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
+    uint16_t mvscale[3][2];
+    uint8_t mvstep[3][2];
+} VP9Context;
+
+static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
+    {
+        { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
+        { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
+    }, {
+        { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
+        { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
+    }
+};
+
+static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
+{
+    ff_thread_release_buffer(ctx, &f->tf);
+    av_buffer_unref(&f->extradata);
+    av_buffer_unref(&f->hwaccel_priv_buf);
+    f->segmentation_map = NULL;
+    f->hwaccel_picture_private = NULL;
+}
+
+static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
+{
+    VP9Context *s = ctx->priv_data;
+    int ret, sz;
+
+    if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+    sz = 64 * s->sb_cols * s->sb_rows;
+    if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
+        goto fail;
+    }
+
+    f->segmentation_map = f->extradata->data;
+    f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
+
+    if (ctx->hwaccel) {
+        const AVHWAccel *hwaccel = ctx->hwaccel;
+        av_assert0(!f->hwaccel_picture_private);
+        if (hwaccel->frame_priv_data_size) {
+            f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
+            if (!f->hwaccel_priv_buf)
+                goto fail;
+            f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
+        }
+    }
+
+    return 0;
+
+fail:
+    vp9_unref_frame(ctx, f);
+    return AVERROR(ENOMEM);
+}
+
+static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
+{
+    int res;
+
+    if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
+        return res;
+    } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
+        goto fail;
+    }
+
+    dst->segmentation_map = src->segmentation_map;
+    dst->mv = src->mv;
+    dst->uses_2pass = src->uses_2pass;
+
+    if (src->hwaccel_picture_private) {
+        dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
+        if (!dst->hwaccel_priv_buf)
+            goto fail;
+        dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
+    }
+
+    return 0;
+
+fail:
+    vp9_unref_frame(ctx, dst);
+    return AVERROR(ENOMEM);
+}
+
+static int update_size(AVCodecContext *ctx, int w, int h)
+{
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
+    enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
+    VP9Context *s = ctx->priv_data;
+    uint8_t *p;
+    int bytesperpixel = s->bytesperpixel, res, cols, rows;
+
+    av_assert0(w > 0 && h > 0);
+
+    if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
+        if ((res = ff_set_dimensions(ctx, w, h)) < 0)
+            return res;
+
+        if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
+#if CONFIG_VP9_DXVA2_HWACCEL
+            *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
+#endif
+#if CONFIG_VP9_D3D11VA_HWACCEL
+            *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
+#endif
+#if CONFIG_VP9_VAAPI_HWACCEL
+            *fmtp++ = AV_PIX_FMT_VAAPI;
+#endif
+        }
+
+        *fmtp++ = s->pix_fmt;
+        *fmtp = AV_PIX_FMT_NONE;
+
+        res = ff_thread_get_format(ctx, pix_fmts);
+        if (res < 0)
+            return res;
+
+        ctx->pix_fmt = res;
+        s->gf_fmt  = s->pix_fmt;
+        s->w = w;
+        s->h = h;
+    }
+
+    cols = (w + 7) >> 3;
+    rows = (h + 7) >> 3;
+
+    if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
+        return 0;
+
+    s->last_fmt  = s->pix_fmt;
+    s->sb_cols   = (w + 63) >> 6;
+    s->sb_rows   = (h + 63) >> 6;
+    s->cols      = (w + 7) >> 3;
+    s->rows      = (h + 7) >> 3;
+
+#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
+    av_freep(&s->intra_pred_data[0]);
+    // FIXME we slightly over-allocate here for subsampled chroma, but a little
+    // bit of padding shouldn't affect performance...
+    p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
+                                sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
+    if (!p)
+        return AVERROR(ENOMEM);
+    assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
+    assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
+    assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
+    assign(s->above_y_nnz_ctx,     uint8_t *,             16);
+    assign(s->above_mode_ctx,      uint8_t *,             16);
+    assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
+    assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
+    assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
+    assign(s->above_partition_ctx, uint8_t *,              8);
+    assign(s->above_skip_ctx,      uint8_t *,              8);
+    assign(s->above_txfm_ctx,      uint8_t *,              8);
+    assign(s->above_segpred_ctx,   uint8_t *,              8);
+    assign(s->above_intra_ctx,     uint8_t *,              8);
+    assign(s->above_comp_ctx,      uint8_t *,              8);
+    assign(s->above_ref_ctx,       uint8_t *,              8);
+    assign(s->above_filter_ctx,    uint8_t *,              8);
+    assign(s->lflvl,               struct VP9Filter *,     1);
+#undef assign
+
+    // these will be re-allocated a little later
+    av_freep(&s->b_base);
+    av_freep(&s->block_base);
+
+    if (s->bpp != s->last_bpp) {
+        ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
+        ff_videodsp_init(&s->vdsp, s->bpp);
+        s->last_bpp = s->bpp;
+    }
+
+    return 0;
+}
+
+static int update_block_buffers(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
+
+    if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
+        return 0;
+
+    av_free(s->b_base);
+    av_free(s->block_base);
+    chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
+    chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
+    if (s->s.frames[CUR_FRAME].uses_2pass) {
+        int sbs = s->sb_cols * s->sb_rows;
+
+        s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
+        s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
+                                    16 * 16 + 2 * chroma_eobs) * sbs);
+        if (!s->b_base || !s->block_base)
+            return AVERROR(ENOMEM);
+        s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
+        s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
+        s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
+        s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
+        s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
+    } else {
+        s->b_base = av_malloc(sizeof(VP9Block));
+        s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
+                                   16 * 16 + 2 * chroma_eobs);
+        if (!s->b_base || !s->block_base)
+            return AVERROR(ENOMEM);
+        s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
+        s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
+        s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
+        s->uveob_base[0] = s->eob_base + 16 * 16;
+        s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
+    }
+    s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
+
+    return 0;
+}
+
+// for some reason the sign bit is at the end, not the start, of a bit sequence
+static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
+{
+    int v = get_bits(gb, n);
+    return get_bits1(gb) ? -v : v;
+}
+
+static av_always_inline int inv_recenter_nonneg(int v, int m)
+{
+    return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
+}
+
+// differential forward probability updates
+static int update_prob(VP56RangeCoder *c, int p)
+{
+    static const int inv_map_table[255] = {
+          7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
+        189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
+         10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
+         25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
+         40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
+         55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
+         70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
+         86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
+        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
+        116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
+        131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
+        146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
+        161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+        177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
+        192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
+        207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
+        222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
+        237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
+        252, 253, 253,
+    };
+    int d;
+
+    /* This code is trying to do a differential probability update. For a
+     * current probability A in the range [1, 255], the difference to a new
+     * probability of any value can be expressed differentially as 1-A,255-A
+     * where some part of this (absolute range) exists both in positive as
+     * well as the negative part, whereas another part only exists in one
+     * half. We're trying to code this shared part differentially, i.e.
+     * times two where the value of the lowest bit specifies the sign, and
+     * the single part is then coded on top of this. This absolute difference
+     * then again has a value of [0,254], but a bigger value in this range
+     * indicates that we're further away from the original value A, so we
+     * can code this as a VLC code, since higher values are increasingly
+     * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
+     * updates vs. the 'fine, exact' updates further down the range, which
+     * adds one extra dimension to this differential update model. */
+
+    if (!vp8_rac_get(c)) {
+        d = vp8_rac_get_uint(c, 4) + 0;
+    } else if (!vp8_rac_get(c)) {
+        d = vp8_rac_get_uint(c, 4) + 16;
+    } else if (!vp8_rac_get(c)) {
+        d = vp8_rac_get_uint(c, 5) + 32;
+    } else {
+        d = vp8_rac_get_uint(c, 7);
+        if (d >= 65)
+            d = (d << 1) - 65 + vp8_rac_get(c);
+        d += 64;
+        av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
+    }
+
+    return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
+                    255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
+}
+
+static int read_colorspace_details(AVCodecContext *ctx)
+{
+    static const enum AVColorSpace colorspaces[8] = {
+        AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
+        AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
+    };
+    VP9Context *s = ctx->priv_data;
+    int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
+
+    s->bpp_index = bits;
+    s->bpp = 8 + bits * 2;
+    s->bytesperpixel = (7 + s->bpp) >> 3;
+    ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
+    if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
+        static const enum AVPixelFormat pix_fmt_rgb[3] = {
+            AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
+        };
+        s->ss_h = s->ss_v = 0;
+        ctx->color_range = AVCOL_RANGE_JPEG;
+        s->pix_fmt = pix_fmt_rgb[bits];
+        if (ctx->profile & 1) {
+            if (get_bits1(&s->gb)) {
+                av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
+                return AVERROR_INVALIDDATA;
+            }
+        } else {
+            av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
+                   ctx->profile);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
+            { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
+              { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
+            { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
+              { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
+            { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
+              { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
+        };
+        ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+        if (ctx->profile & 1) {
+            s->ss_h = get_bits1(&s->gb);
+            s->ss_v = get_bits1(&s->gb);
+            s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
+            if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
+                av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
+                       ctx->profile);
+                return AVERROR_INVALIDDATA;
+            } else if (get_bits1(&s->gb)) {
+                av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
+                       ctx->profile);
+                return AVERROR_INVALIDDATA;
+            }
+        } else {
+            s->ss_h = s->ss_v = 1;
+            s->pix_fmt = pix_fmt_for_ss[bits][1][1];
+        }
+    }
+
+    return 0;
+}
+
+static int decode_frame_header(AVCodecContext *ctx,
+                               const uint8_t *data, int size, int *ref)
+{
+    VP9Context *s = ctx->priv_data;
+    int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
+    int last_invisible;
+    const uint8_t *data2;
+
+    /* general header */
+    if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
+        return res;
+    }
+    if (get_bits(&s->gb, 2) != 0x2) { // frame marker
+        av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
+        return AVERROR_INVALIDDATA;
+    }
+    ctx->profile  = get_bits1(&s->gb);
+    ctx->profile |= get_bits1(&s->gb) << 1;
+    if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
+    if (ctx->profile > 3) {
+        av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
+        return AVERROR_INVALIDDATA;
+    }
+    s->s.h.profile = ctx->profile;
+    if (get_bits1(&s->gb)) {
+        *ref = get_bits(&s->gb, 3);
+        return 0;
+    }
+    s->last_keyframe  = s->s.h.keyframe;
+    s->s.h.keyframe     = !get_bits1(&s->gb);
+    last_invisible    = s->s.h.invisible;
+    s->s.h.invisible    = !get_bits1(&s->gb);
+    s->s.h.errorres     = get_bits1(&s->gb);
+    s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
+    if (s->s.h.keyframe) {
+        if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
+            av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if ((res = read_colorspace_details(ctx)) < 0)
+            return res;
+        // for profile 1, here follows the subsampling bits
+        s->s.h.refreshrefmask = 0xff;
+        w = get_bits(&s->gb, 16) + 1;
+        h = get_bits(&s->gb, 16) + 1;
+        if (get_bits1(&s->gb)) // display size
+            skip_bits(&s->gb, 32);
+    } else {
+        s->s.h.intraonly  = s->s.h.invisible ? get_bits1(&s->gb) : 0;
+        s->s.h.resetctx   = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
+        if (s->s.h.intraonly) {
+            if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
+                av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (ctx->profile >= 1) {
+                if ((res = read_colorspace_details(ctx)) < 0)
+                    return res;
+            } else {
+                s->ss_h = s->ss_v = 1;
+                s->bpp = 8;
+                s->bpp_index = 0;
+                s->bytesperpixel = 1;
+                s->pix_fmt = AV_PIX_FMT_YUV420P;
+                ctx->colorspace = AVCOL_SPC_BT470BG;
+                ctx->color_range = AVCOL_RANGE_JPEG;
+            }
+            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
+            w = get_bits(&s->gb, 16) + 1;
+            h = get_bits(&s->gb, 16) + 1;
+            if (get_bits1(&s->gb)) // display size
+                skip_bits(&s->gb, 32);
+        } else {
+            s->s.h.refreshrefmask = get_bits(&s->gb, 8);
+            s->s.h.refidx[0]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            s->s.h.refidx[1]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            s->s.h.refidx[2]      = get_bits(&s->gb, 3);
+            s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
+            if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
+                !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
+                !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
+                av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
+                return AVERROR_INVALIDDATA;
+            }
+            if (get_bits1(&s->gb)) {
+                w = s->s.refs[s->s.h.refidx[0]].f->width;
+                h = s->s.refs[s->s.h.refidx[0]].f->height;
+            } else if (get_bits1(&s->gb)) {
+                w = s->s.refs[s->s.h.refidx[1]].f->width;
+                h = s->s.refs[s->s.h.refidx[1]].f->height;
+            } else if (get_bits1(&s->gb)) {
+                w = s->s.refs[s->s.h.refidx[2]].f->width;
+                h = s->s.refs[s->s.h.refidx[2]].f->height;
+            } else {
+                w = get_bits(&s->gb, 16) + 1;
+                h = get_bits(&s->gb, 16) + 1;
+            }
+            // Note that in this code, "CUR_FRAME" is actually before we
+            // have formally allocated a frame, and thus actually represents
+            // the _last_ frame
+            s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
+                                       s->s.frames[CUR_FRAME].tf.f->height == h;
+            if (get_bits1(&s->gb)) // display size
+                skip_bits(&s->gb, 32);
+            s->s.h.highprecisionmvs = get_bits1(&s->gb);
+            s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
+                                                  get_bits(&s->gb, 2);
+            s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
+                                  s->s.h.signbias[0] != s->s.h.signbias[2];
+            if (s->s.h.allowcompinter) {
+                if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
+                    s->s.h.fixcompref    = 2;
+                    s->s.h.varcompref[0] = 0;
+                    s->s.h.varcompref[1] = 1;
+                } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
+                    s->s.h.fixcompref    = 1;
+                    s->s.h.varcompref[0] = 0;
+                    s->s.h.varcompref[1] = 2;
+                } else {
+                    s->s.h.fixcompref    = 0;
+                    s->s.h.varcompref[0] = 1;
+                    s->s.h.varcompref[1] = 2;
+                }
+            }
+        }
+    }
+    s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
+    s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
+    s->s.h.framectxid   = c = get_bits(&s->gb, 2);
+    if (s->s.h.keyframe || s->s.h.intraonly)
+        s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
+
+    /* loopfilter header data */
+    if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
+        // reset loopfilter defaults
+        s->s.h.lf_delta.ref[0] = 1;
+        s->s.h.lf_delta.ref[1] = 0;
+        s->s.h.lf_delta.ref[2] = -1;
+        s->s.h.lf_delta.ref[3] = -1;
+        s->s.h.lf_delta.mode[0] = 0;
+        s->s.h.lf_delta.mode[1] = 0;
+        memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
+    }
+    s->s.h.filter.level = get_bits(&s->gb, 6);
+    sharp = get_bits(&s->gb, 3);
+    // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
+    // the old cache values since they are still valid
+    if (s->s.h.filter.sharpness != sharp)
+        memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
+    s->s.h.filter.sharpness = sharp;
+    if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
+        if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
+            for (i = 0; i < 4; i++)
+                if (get_bits1(&s->gb))
+                    s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
+            for (i = 0; i < 2; i++)
+                if (get_bits1(&s->gb))
+                    s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
+        }
+    }
+
+    /* quantization header data */
+    s->s.h.yac_qi      = get_bits(&s->gb, 8);
+    s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
+                       s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
+    if (s->s.h.lossless)
+        ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
+
+    /* segmentation header info */
+    if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
+        if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
+            for (i = 0; i < 7; i++)
+                s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
+                                 get_bits(&s->gb, 8) : 255;
+            if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) {
+                for (i = 0; i < 3; i++)
+                    s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
+                                         get_bits(&s->gb, 8) : 255;
+            }
+        }
+
+        if (get_bits1(&s->gb)) {
+            s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
+            for (i = 0; i < 8; i++) {
+                if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
+                if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
+                if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
+                    s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
+                s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
+            }
+        }
+    }
+
+    // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
+    for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
+        int qyac, qydc, quvac, quvdc, lflvl, sh;
+
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
+            if (s->s.h.segmentation.absolute_vals)
+                qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
+            else
+                qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
+        } else {
+            qyac  = s->s.h.yac_qi;
+        }
+        qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
+        quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
+        quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
+        qyac  = av_clip_uintp2(qyac, 8);
+
+        s->s.h.segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
+        s->s.h.segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
+        s->s.h.segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
+        s->s.h.segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
+
+        sh = s->s.h.filter.level >= 32;
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
+            if (s->s.h.segmentation.absolute_vals)
+                lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
+            else
+                lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
+        } else {
+            lflvl  = s->s.h.filter.level;
+        }
+        if (s->s.h.lf_delta.enabled) {
+            s->s.h.segmentation.feat[i].lflvl[0][0] =
+            s->s.h.segmentation.feat[i].lflvl[0][1] =
+                av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
+            for (j = 1; j < 4; j++) {
+                s->s.h.segmentation.feat[i].lflvl[j][0] =
+                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
+                                             s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
+                s->s.h.segmentation.feat[i].lflvl[j][1] =
+                    av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
+                                             s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
+            }
+        } else {
+            memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
+                   sizeof(s->s.h.segmentation.feat[i].lflvl));
+        }
+    }
+
+    /* tiling info */
+    if ((res = update_size(ctx, w, h)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
+               w, h, s->pix_fmt);
+        return res;
+    }
+    for (s->s.h.tiling.log2_tile_cols = 0;
+         s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
+         s->s.h.tiling.log2_tile_cols++) ;
+    for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
+    max = FFMAX(0, max - 1);
+    while (max > s->s.h.tiling.log2_tile_cols) {
+        if (get_bits1(&s->gb))
+            s->s.h.tiling.log2_tile_cols++;
+        else
+            break;
+    }
+    s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
+    s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
+    if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
+        s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
+        s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
+                                 sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
+        if (!s->c_b) {
+            av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    /* check reference frames */
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        for (i = 0; i < 3; i++) {
+            AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
+            int refw = ref->width, refh = ref->height;
+
+            if (ref->format != ctx->pix_fmt) {
+                av_log(ctx, AV_LOG_ERROR,
+                       "Ref pixfmt (%s) did not match current frame (%s)",
+                       av_get_pix_fmt_name(ref->format),
+                       av_get_pix_fmt_name(ctx->pix_fmt));
+                return AVERROR_INVALIDDATA;
+            } else if (refw == w && refh == h) {
+                s->mvscale[i][0] = s->mvscale[i][1] = 0;
+            } else {
+                if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
+                    av_log(ctx, AV_LOG_ERROR,
+                           "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
+                           refw, refh, w, h);
+                    return AVERROR_INVALIDDATA;
+                }
+                s->mvscale[i][0] = (refw << 14) / w;
+                s->mvscale[i][1] = (refh << 14) / h;
+                s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
+                s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
+            }
+        }
+    }
+
+    if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
+        s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
+                           s->prob_ctx[3].p = vp9_default_probs;
+        memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+        memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+        memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+        memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+    } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
+        s->prob_ctx[c].p = vp9_default_probs;
+        memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+    }
+
+    // next 16 bits is size of the rest of the header (arith-coded)
+    s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
+    s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
+
+    data2 = align_get_bits(&s->gb);
+    if (size2 > size - (data2 - data)) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
+        return AVERROR_INVALIDDATA;
+    }
+    ff_vp56_init_range_decoder(&s->c, data2, size2);
+    if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
+        av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        memset(s->counts.coef, 0, sizeof(s->counts.coef));
+        memset(s->counts.eob,  0, sizeof(s->counts.eob));
+    } else {
+        memset(&s->counts, 0, sizeof(s->counts));
+    }
+    // FIXME is it faster to not copy here, but do it down in the fw updates
+    // as explicit copies if the fw update is missing (and skip the copy upon
+    // fw update)?
+    s->prob.p = s->prob_ctx[c].p;
+
+    // txfm updates
+    if (s->s.h.lossless) {
+        s->s.h.txfmmode = TX_4X4;
+    } else {
+        s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
+        if (s->s.h.txfmmode == 3)
+            s->s.h.txfmmode += vp8_rac_get(&s->c);
+
+        if (s->s.h.txfmmode == TX_SWITCHABLE) {
+            for (i = 0; i < 2; i++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
+            for (i = 0; i < 2; i++)
+                for (j = 0; j < 2; j++)
+                    if (vp56_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.tx16p[i][j] =
+                            update_prob(&s->c, s->prob.p.tx16p[i][j]);
+            for (i = 0; i < 2; i++)
+                for (j = 0; j < 3; j++)
+                    if (vp56_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.tx32p[i][j] =
+                            update_prob(&s->c, s->prob.p.tx32p[i][j]);
+        }
+    }
+
+    // coef updates
+    for (i = 0; i < 4; i++) {
+        uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
+        if (vp8_rac_get(&s->c)) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++) {
+                            uint8_t *p = s->prob.coef[i][j][k][l][m];
+                            uint8_t *r = ref[j][k][l][m];
+                            if (m >= 3 && l == 0) // dc only has 3 pt
+                                break;
+                            for (n = 0; n < 3; n++) {
+                                if (vp56_rac_get_prob_branchy(&s->c, 252)) {
+                                    p[n] = update_prob(&s->c, r[n]);
+                                } else {
+                                    p[n] = r[n];
+                                }
+                            }
+                            p[3] = 0;
+                        }
+        } else {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++) {
+                            uint8_t *p = s->prob.coef[i][j][k][l][m];
+                            uint8_t *r = ref[j][k][l][m];
+                            if (m > 3 && l == 0) // dc only has 3 pt
+                                break;
+                            memcpy(p, r, 3);
+                            p[3] = 0;
+                        }
+        }
+        if (s->s.h.txfmmode == i)
+            break;
+    }
+
+    // mode updates
+    for (i = 0; i < 3; i++)
+        if (vp56_rac_get_prob_branchy(&s->c, 252))
+            s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        for (i = 0; i < 7; i++)
+            for (j = 0; j < 3; j++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_mode[i][j] =
+                        update_prob(&s->c, s->prob.p.mv_mode[i][j]);
+
+        if (s->s.h.filtermode == FILTER_SWITCHABLE)
+            for (i = 0; i < 4; i++)
+                for (j = 0; j < 2; j++)
+                    if (vp56_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.filter[i][j] =
+                            update_prob(&s->c, s->prob.p.filter[i][j]);
+
+        for (i = 0; i < 4; i++)
+            if (vp56_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
+
+        if (s->s.h.allowcompinter) {
+            s->s.h.comppredmode = vp8_rac_get(&s->c);
+            if (s->s.h.comppredmode)
+                s->s.h.comppredmode += vp8_rac_get(&s->c);
+            if (s->s.h.comppredmode == PRED_SWITCHABLE)
+                for (i = 0; i < 5; i++)
+                    if (vp56_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.comp[i] =
+                            update_prob(&s->c, s->prob.p.comp[i]);
+        } else {
+            s->s.h.comppredmode = PRED_SINGLEREF;
+        }
+
+        if (s->s.h.comppredmode != PRED_COMPREF) {
+            for (i = 0; i < 5; i++) {
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.single_ref[i][0] =
+                        update_prob(&s->c, s->prob.p.single_ref[i][0]);
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.single_ref[i][1] =
+                        update_prob(&s->c, s->prob.p.single_ref[i][1]);
+            }
+        }
+
+        if (s->s.h.comppredmode != PRED_SINGLEREF) {
+            for (i = 0; i < 5; i++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.comp_ref[i] =
+                        update_prob(&s->c, s->prob.p.comp_ref[i]);
+        }
+
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < 9; j++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.y_mode[i][j] =
+                        update_prob(&s->c, s->prob.p.y_mode[i][j]);
+
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < 4; j++)
+                for (k = 0; k < 3; k++)
+                    if (vp56_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.partition[3 - i][j][k] =
+                            update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
+
+        // mv fields don't use the update_prob subexp model for some reason
+        for (i = 0; i < 3; i++)
+            if (vp56_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+
+        for (i = 0; i < 2; i++) {
+            if (vp56_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            for (j = 0; j < 10; j++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].classes[j] =
+                        (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            if (vp56_rac_get_prob_branchy(&s->c, 252))
+                s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            for (j = 0; j < 10; j++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].bits[j] =
+                        (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+        }
+
+        for (i = 0; i < 2; i++) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 3; k++)
+                    if (vp56_rac_get_prob_branchy(&s->c, 252))
+                        s->prob.p.mv_comp[i].class0_fp[j][k] =
+                            (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+
+            for (j = 0; j < 3; j++)
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].fp[j] =
+                        (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+        }
+
+        if (s->s.h.highprecisionmvs) {
+            for (i = 0; i < 2; i++) {
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].class0_hp =
+                        (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+
+                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                    s->prob.p.mv_comp[i].hp =
+                        (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+            }
+        }
+    }
+
+    return (data2 - data) + size2;
+}
+
+static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
+                                      VP9Context *s)
+{
+    dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
+    dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
+}
+
+static void find_ref_mvs(VP9Context *s,
+                         VP56mv *pmv, int ref, int z, int idx, int sb)
+{
+    static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
+        [BS_64x64] = {{  3, -1 }, { -1,  3 }, {  4, -1 }, { -1,  4 },
+                      { -1, -1 }, {  0, -1 }, { -1,  0 }, {  6, -1 }},
+        [BS_64x32] = {{  0, -1 }, { -1,  0 }, {  4, -1 }, { -1,  2 },
+                      { -1, -1 }, {  0, -3 }, { -3,  0 }, {  2, -1 }},
+        [BS_32x64] = {{ -1,  0 }, {  0, -1 }, { -1,  4 }, {  2, -1 },
+                      { -1, -1 }, { -3,  0 }, {  0, -3 }, { -1,  2 }},
+        [BS_32x32] = {{  1, -1 }, { -1,  1 }, {  2, -1 }, { -1,  2 },
+                      { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 }},
+        [BS_32x16] = {{  0, -1 }, { -1,  0 }, {  2, -1 }, { -1, -1 },
+                      { -1,  1 }, {  0, -3 }, { -3,  0 }, { -3, -3 }},
+        [BS_16x32] = {{ -1,  0 }, {  0, -1 }, { -1,  2 }, { -1, -1 },
+                      {  1, -1 }, { -3,  0 }, {  0, -3 }, { -3, -3 }},
+        [BS_16x16] = {{  0, -1 }, { -1,  0 }, {  1, -1 }, { -1,  1 },
+                      { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 }},
+        [BS_16x8]  = {{  0, -1 }, { -1,  0 }, {  1, -1 }, { -1, -1 },
+                      {  0, -2 }, { -2,  0 }, { -2, -1 }, { -1, -2 }},
+        [BS_8x16]  = {{ -1,  0 }, {  0, -1 }, { -1,  1 }, { -1, -1 },
+                      { -2,  0 }, {  0, -2 }, { -1, -2 }, { -2, -1 }},
+        [BS_8x8]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+        [BS_8x4]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+        [BS_4x8]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+        [BS_4x4]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+    };
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col, row7 = s->row7;
+    const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
+#define INVALID_MV 0x80008000U
+    uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
+    int i;
+
+#define RETURN_DIRECT_MV(mv) \
+    do { \
+        uint32_t m = AV_RN32A(&mv); \
+        if (!idx) { \
+            AV_WN32A(pmv, m); \
+            return; \
+        } else if (mem == INVALID_MV) { \
+            mem = m; \
+        } else if (m != mem) { \
+            AV_WN32A(pmv, m); \
+            return; \
+        } \
+    } while (0)
+
+    if (sb >= 0) {
+        if (sb == 2 || sb == 1) {
+            RETURN_DIRECT_MV(b->mv[0][z]);
+        } else if (sb == 3) {
+            RETURN_DIRECT_MV(b->mv[2][z]);
+            RETURN_DIRECT_MV(b->mv[1][z]);
+            RETURN_DIRECT_MV(b->mv[0][z]);
+        }
+
+#define RETURN_MV(mv) \
+    do { \
+        if (sb > 0) { \
+            VP56mv tmp; \
+            uint32_t m; \
+            av_assert2(idx == 1); \
+            av_assert2(mem != INVALID_MV); \
+            if (mem_sub8x8 == INVALID_MV) { \
+                clamp_mv(&tmp, &mv, s); \
+                m = AV_RN32A(&tmp); \
+                if (m != mem) { \
+                    AV_WN32A(pmv, m); \
+                    return; \
+                } \
+                mem_sub8x8 = AV_RN32A(&mv); \
+            } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
+                clamp_mv(&tmp, &mv, s); \
+                m = AV_RN32A(&tmp); \
+                if (m != mem) { \
+                    AV_WN32A(pmv, m); \
+                } else { \
+                    /* BUG I'm pretty sure this isn't the intention */ \
+                    AV_WN32A(pmv, 0); \
+                } \
+                return; \
+            } \
+        } else { \
+            uint32_t m = AV_RN32A(&mv); \
+            if (!idx) { \
+                clamp_mv(pmv, &mv, s); \
+                return; \
+            } else if (mem == INVALID_MV) { \
+                mem = m; \
+            } else if (m != mem) { \
+                clamp_mv(pmv, &mv, s); \
+                return; \
+            } \
+        } \
+    } while (0)
+
+        if (row > 0) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
+            if (mv->ref[0] == ref) {
+                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
+            } else if (mv->ref[1] == ref) {
+                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
+            }
+        }
+        if (col > s->tile_col_start) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
+            if (mv->ref[0] == ref) {
+                RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
+            } else if (mv->ref[1] == ref) {
+                RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
+            }
+        }
+        i = 2;
+    } else {
+        i = 0;
+    }
+
+    // previously coded MVs in this neighbourhood, using same reference frame
+    for (; i < 8; i++) {
+        int c = p[i][0] + col, r = p[i][1] + row;
+
+        if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+
+            if (mv->ref[0] == ref) {
+                RETURN_MV(mv->mv[0]);
+            } else if (mv->ref[1] == ref) {
+                RETURN_MV(mv->mv[1]);
+            }
+        }
+    }
+
+    // MV at this position in previous frame, using same reference frame
+    if (s->s.h.use_last_frame_mvs) {
+        struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
+
+        if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
+            ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
+        if (mv->ref[0] == ref) {
+            RETURN_MV(mv->mv[0]);
+        } else if (mv->ref[1] == ref) {
+            RETURN_MV(mv->mv[1]);
+        }
+    }
+
+#define RETURN_SCALE_MV(mv, scale) \
+    do { \
+        if (scale) { \
+            VP56mv mv_temp = { -mv.x, -mv.y }; \
+            RETURN_MV(mv_temp); \
+        } else { \
+            RETURN_MV(mv); \
+        } \
+    } while (0)
+
+    // previously coded MVs in this neighbourhood, using different reference frame
+    for (i = 0; i < 8; i++) {
+        int c = p[i][0] + col, r = p[i][1] + row;
+
+        if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+
+            if (mv->ref[0] != ref && mv->ref[0] >= 0) {
+                RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
+            }
+            if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
+                // BUG - libvpx has this condition regardless of whether
+                // we used the first ref MV and pre-scaling
+                AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
+                RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
+            }
+        }
+    }
+
+    // MV at this position in previous frame, using different reference frame
+    if (s->s.h.use_last_frame_mvs) {
+        struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
+
+        // no need to await_progress, because we already did that above
+        if (mv->ref[0] != ref && mv->ref[0] >= 0) {
+            RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
+        }
+        if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
+            // BUG - libvpx has this condition regardless of whether
+            // we used the first ref MV and pre-scaling
+            AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
+            RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
+        }
+    }
+
+    AV_ZERO32(pmv);
+    clamp_mv(pmv, pmv, s);
+#undef INVALID_MV
+#undef RETURN_MV
+#undef RETURN_SCALE_MV
+}
+
+static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
+{
+    int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
+    int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
+                                s->prob.p.mv_comp[idx].classes);
+
+    s->counts.mv_comp[idx].sign[sign]++;
+    s->counts.mv_comp[idx].classes[c]++;
+    if (c) {
+        int m;
+
+        for (n = 0, m = 0; m < c; m++) {
+            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
+            n |= bit << m;
+            s->counts.mv_comp[idx].bits[m][bit]++;
+        }
+        n <<= 3;
+        bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
+        n |= bit << 1;
+        s->counts.mv_comp[idx].fp[bit]++;
+        if (hp) {
+            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
+            s->counts.mv_comp[idx].hp[bit]++;
+            n |= bit;
+        } else {
+            n |= 1;
+            // bug in libvpx - we count for bw entropy purposes even if the
+            // bit wasn't coded
+            s->counts.mv_comp[idx].hp[1]++;
+        }
+        n += 8 << c;
+    } else {
+        n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
+        s->counts.mv_comp[idx].class0[n]++;
+        bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
+                               s->prob.p.mv_comp[idx].class0_fp[n]);
+        s->counts.mv_comp[idx].class0_fp[n][bit]++;
+        n = (n << 3) | (bit << 1);
+        if (hp) {
+            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
+            s->counts.mv_comp[idx].class0_hp[bit]++;
+            n |= bit;
+        } else {
+            n |= 1;
+            // bug in libvpx - we count for bw entropy purposes even if the
+            // bit wasn't coded
+            s->counts.mv_comp[idx].class0_hp[1]++;
+        }
+    }
+
+    return sign ? -(n + 1) : (n + 1);
+}
+
+static void fill_mv(VP9Context *s,
+                    VP56mv *mv, int mode, int sb)
+{
+    VP9Block *b = s->b;
+
+    if (mode == ZEROMV) {
+        AV_ZERO64(mv);
+    } else {
+        int hp;
+
+        // FIXME cache this value and reuse for other subblocks
+        find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
+                     mode == NEWMV ? -1 : sb);
+        // FIXME maybe move this code into find_ref_mvs()
+        if ((mode == NEWMV || sb == -1) &&
+            !(hp = s->s.h.highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
+            if (mv[0].y & 1) {
+                if (mv[0].y < 0)
+                    mv[0].y++;
+                else
+                    mv[0].y--;
+            }
+            if (mv[0].x & 1) {
+                if (mv[0].x < 0)
+                    mv[0].x++;
+                else
+                    mv[0].x--;
+            }
+        }
+        if (mode == NEWMV) {
+            enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
+                                              s->prob.p.mv_joint);
+
+            s->counts.mv_joint[j]++;
+            if (j >= MV_JOINT_V)
+                mv[0].y += read_mv_component(s, 0, hp);
+            if (j & 1)
+                mv[0].x += read_mv_component(s, 1, hp);
+        }
+
+        if (b->comp) {
+            // FIXME cache this value and reuse for other subblocks
+            find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
+                         mode == NEWMV ? -1 : sb);
+            if ((mode == NEWMV || sb == -1) &&
+                !(hp = s->s.h.highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
+                if (mv[1].y & 1) {
+                    if (mv[1].y < 0)
+                        mv[1].y++;
+                    else
+                        mv[1].y--;
+                }
+                if (mv[1].x & 1) {
+                    if (mv[1].x < 0)
+                        mv[1].x++;
+                    else
+                        mv[1].x--;
+                }
+            }
+            if (mode == NEWMV) {
+                enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
+                                                  s->prob.p.mv_joint);
+
+                s->counts.mv_joint[j]++;
+                if (j >= MV_JOINT_V)
+                    mv[1].y += read_mv_component(s, 0, hp);
+                if (j & 1)
+                    mv[1].x += read_mv_component(s, 1, hp);
+            }
+        }
+    }
+}
+
+static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
+                                       ptrdiff_t stride, int v)
+{
+    switch (w) {
+    case 1:
+        do {
+            *ptr = v;
+            ptr += stride;
+        } while (--h);
+        break;
+    case 2: {
+        int v16 = v * 0x0101;
+        do {
+            AV_WN16A(ptr, v16);
+            ptr += stride;
+        } while (--h);
+        break;
+    }
+    case 4: {
+        uint32_t v32 = v * 0x01010101;
+        do {
+            AV_WN32A(ptr, v32);
+            ptr += stride;
+        } while (--h);
+        break;
+    }
+    case 8: {
+#if HAVE_FAST_64BIT
+        uint64_t v64 = v * 0x0101010101010101ULL;
+        do {
+            AV_WN64A(ptr, v64);
+            ptr += stride;
+        } while (--h);
+#else
+        uint32_t v32 = v * 0x01010101;
+        do {
+            AV_WN32A(ptr,     v32);
+            AV_WN32A(ptr + 4, v32);
+            ptr += stride;
+        } while (--h);
+#endif
+        break;
+    }
+    }
+}
+
+static void decode_mode(AVCodecContext *ctx)
+{
+    static const uint8_t left_ctx[N_BS_SIZES] = {
+        0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
+    };
+    static const uint8_t above_ctx[N_BS_SIZES] = {
+        0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
+    };
+    static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
+        TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
+        TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
+    };
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col, row7 = s->row7;
+    enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
+    int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
+    int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
+    int have_a = row > 0, have_l = col > s->tile_col_start;
+    int vref, filter_id;
+
+    if (!s->s.h.segmentation.enabled) {
+        b->seg_id = 0;
+    } else if (s->s.h.keyframe || s->s.h.intraonly) {
+        b->seg_id = !s->s.h.segmentation.update_map ? 0 :
+                    vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->s.h.segmentation.prob);
+    } else if (!s->s.h.segmentation.update_map ||
+               (s->s.h.segmentation.temporal &&
+                vp56_rac_get_prob_branchy(&s->c,
+                    s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
+                                    s->left_segpred_ctx[row7]]))) {
+        if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
+            int pred = 8, x;
+            uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
+
+            if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
+                ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
+            for (y = 0; y < h4; y++) {
+                int idx_base = (y + row) * 8 * s->sb_cols + col;
+                for (x = 0; x < w4; x++)
+                    pred = FFMIN(pred, refsegmap[idx_base + x]);
+            }
+            av_assert1(pred < 8);
+            b->seg_id = pred;
+        } else {
+            b->seg_id = 0;
+        }
+
+        memset(&s->above_segpred_ctx[col], 1, w4);
+        memset(&s->left_segpred_ctx[row7], 1, h4);
+    } else {
+        b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
+                                     s->s.h.segmentation.prob);
+
+        memset(&s->above_segpred_ctx[col], 0, w4);
+        memset(&s->left_segpred_ctx[row7], 0, h4);
+    }
+    if (s->s.h.segmentation.enabled &&
+        (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
+        setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
+                  bw4, bh4, 8 * s->sb_cols, b->seg_id);
+    }
+
+    b->skip = s->s.h.segmentation.enabled &&
+        s->s.h.segmentation.feat[b->seg_id].skip_enabled;
+    if (!b->skip) {
+        int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
+        b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
+        s->counts.skip[c][b->skip]++;
+    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        b->intra = 1;
+    } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
+        b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
+    } else {
+        int c, bit;
+
+        if (have_a && have_l) {
+            c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
+            c += (c == 2);
+        } else {
+            c = have_a ? 2 * s->above_intra_ctx[col] :
+                have_l ? 2 * s->left_intra_ctx[row7] : 0;
+        }
+        bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
+        s->counts.intra[c][bit]++;
+        b->intra = !bit;
+    }
+
+    if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
+        int c;
+        if (have_a) {
+            if (have_l) {
+                c = (s->above_skip_ctx[col] ? max_tx :
+                     s->above_txfm_ctx[col]) +
+                    (s->left_skip_ctx[row7] ? max_tx :
+                     s->left_txfm_ctx[row7]) > max_tx;
+            } else {
+                c = s->above_skip_ctx[col] ? 1 :
+                    (s->above_txfm_ctx[col] * 2 > max_tx);
+            }
+        } else if (have_l) {
+            c = s->left_skip_ctx[row7] ? 1 :
+                (s->left_txfm_ctx[row7] * 2 > max_tx);
+        } else {
+            c = 1;
+        }
+        switch (max_tx) {
+        case TX_32X32:
+            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
+            if (b->tx) {
+                b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
+                if (b->tx == 2)
+                    b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
+            }
+            s->counts.tx32p[c][b->tx]++;
+            break;
+        case TX_16X16:
+            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
+            if (b->tx)
+                b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
+            s->counts.tx16p[c][b->tx]++;
+            break;
+        case TX_8X8:
+            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
+            s->counts.tx8p[c][b->tx]++;
+            break;
+        case TX_4X4:
+            b->tx = TX_4X4;
+            break;
+        }
+    } else {
+        b->tx = FFMIN(max_tx, s->s.h.txfmmode);
+    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        uint8_t *a = &s->above_mode_ctx[col * 2];
+        uint8_t *l = &s->left_mode_ctx[(row7) << 1];
+
+        b->comp = 0;
+        if (b->bs > BS_8x8) {
+            // FIXME the memory storage intermediates here aren't really
+            // necessary, they're just there to make the code slightly
+            // simpler for now
+            b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                    vp9_default_kf_ymode_probs[a[0]][l[0]]);
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
+                l[0] = a[1] = b->mode[1];
+            } else {
+                l[0] = a[1] = b->mode[1] = b->mode[0];
+            }
+            if (b->bs != BS_4x8) {
+                b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                        vp9_default_kf_ymode_probs[a[0]][l[1]]);
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
+                    l[1] = a[1] = b->mode[3];
+                } else {
+                    l[1] = a[1] = b->mode[3] = b->mode[2];
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                l[1] = a[1] = b->mode[3] = b->mode[1];
+            }
+        } else {
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                          vp9_default_kf_ymode_probs[*a][*l]);
+            b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
+            // FIXME this can probably be optimized
+            memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
+            memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
+        }
+        b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                     vp9_default_kf_uvmode_probs[b->mode[3]]);
+    } else if (b->intra) {
+        b->comp = 0;
+        if (b->bs > BS_8x8) {
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                          s->prob.p.y_mode[0]);
+            s->counts.y_mode[0][b->mode[0]]++;
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                              s->prob.p.y_mode[0]);
+                s->counts.y_mode[0][b->mode[1]]++;
+            } else {
+                b->mode[1] = b->mode[0];
+            }
+            if (b->bs != BS_4x8) {
+                b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                              s->prob.p.y_mode[0]);
+                s->counts.y_mode[0][b->mode[2]]++;
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                                  s->prob.p.y_mode[0]);
+                    s->counts.y_mode[0][b->mode[3]]++;
+                } else {
+                    b->mode[3] = b->mode[2];
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                b->mode[3] = b->mode[1];
+            }
+        } else {
+            static const uint8_t size_group[10] = {
+                3, 3, 3, 3, 2, 2, 2, 1, 1, 1
+            };
+            int sz = size_group[b->bs];
+
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                          s->prob.p.y_mode[sz]);
+            b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
+            s->counts.y_mode[sz][b->mode[3]]++;
+        }
+        b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                     s->prob.p.uv_mode[b->mode[3]]);
+        s->counts.uv_mode[b->mode[3]][b->uvmode]++;
+    } else {
+        static const uint8_t inter_mode_ctx_lut[14][14] = {
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
+        };
+
+        if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
+            av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
+            b->comp = 0;
+            b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
+        } else {
+            // read comp_pred flag
+            if (s->s.h.comppredmode != PRED_SWITCHABLE) {
+                b->comp = s->s.h.comppredmode == PRED_COMPREF;
+            } else {
+                int c;
+
+                // FIXME add intra as ref=0xff (or -1) to make these easier?
+                if (have_a) {
+                    if (have_l) {
+                        if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
+                            c = 4;
+                        } else if (s->above_comp_ctx[col]) {
+                            c = 2 + (s->left_intra_ctx[row7] ||
+                                     s->left_ref_ctx[row7] == s->s.h.fixcompref);
+                        } else if (s->left_comp_ctx[row7]) {
+                            c = 2 + (s->above_intra_ctx[col] ||
+                                     s->above_ref_ctx[col] == s->s.h.fixcompref);
+                        } else {
+                            c = (!s->above_intra_ctx[col] &&
+                                 s->above_ref_ctx[col] == s->s.h.fixcompref) ^
+                            (!s->left_intra_ctx[row7] &&
+                             s->left_ref_ctx[row & 7] == s->s.h.fixcompref);
+                        }
+                    } else {
+                        c = s->above_comp_ctx[col] ? 3 :
+                        (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
+                    }
+                } else if (have_l) {
+                    c = s->left_comp_ctx[row7] ? 3 :
+                    (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->s.h.fixcompref);
+                } else {
+                    c = 1;
+                }
+                b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
+                s->counts.comp[c][b->comp]++;
+            }
+
+            // read actual references
+            // FIXME probably cache a few variables here to prevent repetitive
+            // memory accesses below
+            if (b->comp) /* two references */ {
+                int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
+
+                b->ref[fix_idx] = s->s.h.fixcompref;
+                // FIXME can this codeblob be replaced by some sort of LUT?
+                if (have_a) {
+                    if (have_l) {
+                        if (s->above_intra_ctx[col]) {
+                            if (s->left_intra_ctx[row7]) {
+                                c = 2;
+                            } else {
+                                c = 1 + 2 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
+                            }
+                        } else if (s->left_intra_ctx[row7]) {
+                            c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
+                        } else {
+                            int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
+
+                            if (refl == refa && refa == s->s.h.varcompref[1]) {
+                                c = 0;
+                            } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
+                                if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
+                                    (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
+                                    c = 4;
+                                } else {
+                                    c = (refa == refl) ? 3 : 1;
+                                }
+                            } else if (!s->left_comp_ctx[row7]) {
+                                if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
+                                    c = 1;
+                                } else {
+                                    c = (refl == s->s.h.varcompref[1] &&
+                                         refa != s->s.h.varcompref[1]) ? 2 : 4;
+                                }
+                            } else if (!s->above_comp_ctx[col]) {
+                                if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
+                                    c = 1;
+                                } else {
+                                    c = (refa == s->s.h.varcompref[1] &&
+                                         refl != s->s.h.varcompref[1]) ? 2 : 4;
+                                }
+                            } else {
+                                c = (refl == refa) ? 4 : 2;
+                            }
+                        }
+                    } else {
+                        if (s->above_intra_ctx[col]) {
+                            c = 2;
+                        } else if (s->above_comp_ctx[col]) {
+                            c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
+                        } else {
+                            c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
+                        }
+                    }
+                } else if (have_l) {
+                    if (s->left_intra_ctx[row7]) {
+                        c = 2;
+                    } else if (s->left_comp_ctx[row7]) {
+                        c = 4 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
+                    } else {
+                        c = 3 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
+                    }
+                } else {
+                    c = 2;
+                }
+                bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
+                b->ref[var_idx] = s->s.h.varcompref[bit];
+                s->counts.comp_ref[c][bit]++;
+            } else /* single reference */ {
+                int bit, c;
+
+                if (have_a && !s->above_intra_ctx[col]) {
+                    if (have_l && !s->left_intra_ctx[row7]) {
+                        if (s->left_comp_ctx[row7]) {
+                            if (s->above_comp_ctx[col]) {
+                                c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7] ||
+                                         !s->above_ref_ctx[col]);
+                            } else {
+                                c = (3 * !s->above_ref_ctx[col]) +
+                                    (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
+                            }
+                        } else if (s->above_comp_ctx[col]) {
+                            c = (3 * !s->left_ref_ctx[row7]) +
+                                (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
+                        } else {
+                            c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
+                        }
+                    } else if (s->above_intra_ctx[col]) {
+                        c = 2;
+                    } else if (s->above_comp_ctx[col]) {
+                        c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
+                    } else {
+                        c = 4 * (!s->above_ref_ctx[col]);
+                    }
+                } else if (have_l && !s->left_intra_ctx[row7]) {
+                    if (s->left_intra_ctx[row7]) {
+                        c = 2;
+                    } else if (s->left_comp_ctx[row7]) {
+                        c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
+                    } else {
+                        c = 4 * (!s->left_ref_ctx[row7]);
+                    }
+                } else {
+                    c = 2;
+                }
+                bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
+                s->counts.single_ref[c][0][bit]++;
+                if (!bit) {
+                    b->ref[0] = 0;
+                } else {
+                    // FIXME can this codeblob be replaced by some sort of LUT?
+                    if (have_a) {
+                        if (have_l) {
+                            if (s->left_intra_ctx[row7]) {
+                                if (s->above_intra_ctx[col]) {
+                                    c = 2;
+                                } else if (s->above_comp_ctx[col]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 s->above_ref_ctx[col] == 1);
+                                } else if (!s->above_ref_ctx[col]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->above_ref_ctx[col] == 1);
+                                }
+                            } else if (s->above_intra_ctx[col]) {
+                                if (s->left_intra_ctx[row7]) {
+                                    c = 2;
+                                } else if (s->left_comp_ctx[row7]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 s->left_ref_ctx[row7] == 1);
+                                } else if (!s->left_ref_ctx[row7]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (s->above_comp_ctx[col]) {
+                                if (s->left_comp_ctx[row7]) {
+                                    if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
+                                        c = 3 * (s->s.h.fixcompref == 1 ||
+                                                 s->left_ref_ctx[row7] == 1);
+                                    } else {
+                                        c = 2;
+                                    }
+                                } else if (!s->left_ref_ctx[row7]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 s->above_ref_ctx[col] == 1);
+                                } else {
+                                    c = 3 * (s->left_ref_ctx[row7] == 1) +
+                                    (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                                }
+                            } else if (s->left_comp_ctx[row7]) {
+                                if (!s->above_ref_ctx[col]) {
+                                    c = 1 + 2 * (s->s.h.fixcompref == 1 ||
+                                                 s->left_ref_ctx[row7] == 1);
+                                } else {
+                                    c = 3 * (s->above_ref_ctx[col] == 1) +
+                                    (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (!s->above_ref_ctx[col]) {
+                                if (!s->left_ref_ctx[row7]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (!s->left_ref_ctx[row7]) {
+                                c = 4 * (s->above_ref_ctx[col] == 1);
+                            } else {
+                                c = 2 * (s->left_ref_ctx[row7] == 1) +
+                                2 * (s->above_ref_ctx[col] == 1);
+                            }
+                        } else {
+                            if (s->above_intra_ctx[col] ||
+                                (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
+                                c = 2;
+                            } else if (s->above_comp_ctx[col]) {
+                                c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                            } else {
+                                c = 4 * (s->above_ref_ctx[col] == 1);
+                            }
+                        }
+                    } else if (have_l) {
+                        if (s->left_intra_ctx[row7] ||
+                            (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
+                            c = 2;
+                        } else if (s->left_comp_ctx[row7]) {
+                            c = 3 * (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
+                        } else {
+                            c = 4 * (s->left_ref_ctx[row7] == 1);
+                        }
+                    } else {
+                        c = 2;
+                    }
+                    bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
+                    s->counts.single_ref[c][1][bit]++;
+                    b->ref[0] = 1 + bit;
+                }
+            }
+        }
+
+        if (b->bs <= BS_8x8) {
+            if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
+                b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
+            } else {
+                static const uint8_t off[10] = {
+                    3, 0, 0, 1, 0, 0, 0, 0, 0, 0
+                };
+
+                // FIXME this needs to use the LUT tables from find_ref_mvs
+                // because not all are -1,0/0,-1
+                int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
+                                          [s->left_mode_ctx[row7 + off[b->bs]]];
+
+                b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                              s->prob.p.mv_mode[c]);
+                b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
+                s->counts.mv_mode[c][b->mode[0] - 10]++;
+            }
+        }
+
+        if (s->s.h.filtermode == FILTER_SWITCHABLE) {
+            int c;
+
+            if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
+                if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
+                    c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
+                        s->left_filter_ctx[row7] : 3;
+                } else {
+                    c = s->above_filter_ctx[col];
+                }
+            } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
+                c = s->left_filter_ctx[row7];
+            } else {
+                c = 3;
+            }
+
+            filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
+                                         s->prob.p.filter[c]);
+            s->counts.filter[c][filter_id]++;
+            b->filter = vp9_filter_lut[filter_id];
+        } else {
+            b->filter = s->s.h.filtermode;
+        }
+
+        if (b->bs > BS_8x8) {
+            int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
+
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                          s->prob.p.mv_mode[c]);
+            s->counts.mv_mode[c][b->mode[0] - 10]++;
+            fill_mv(s, b->mv[0], b->mode[0], 0);
+
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                              s->prob.p.mv_mode[c]);
+                s->counts.mv_mode[c][b->mode[1] - 10]++;
+                fill_mv(s, b->mv[1], b->mode[1], 1);
+            } else {
+                b->mode[1] = b->mode[0];
+                AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
+                AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
+            }
+
+            if (b->bs != BS_4x8) {
+                b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                              s->prob.p.mv_mode[c]);
+                s->counts.mv_mode[c][b->mode[2] - 10]++;
+                fill_mv(s, b->mv[2], b->mode[2], 2);
+
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                                  s->prob.p.mv_mode[c]);
+                    s->counts.mv_mode[c][b->mode[3] - 10]++;
+                    fill_mv(s, b->mv[3], b->mode[3], 3);
+                } else {
+                    b->mode[3] = b->mode[2];
+                    AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
+                    AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
+                AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
+                b->mode[3] = b->mode[1];
+                AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
+                AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
+            }
+        } else {
+            fill_mv(s, b->mv[0], b->mode[0], -1);
+            AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
+            AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
+            AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
+        }
+
+        vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
+    }
+
+#if HAVE_FAST_64BIT
+#define SPLAT_CTX(var, val, n) \
+    switch (n) { \
+    case 1:  var = val;                                    break; \
+    case 2:  AV_WN16A(&var, val *             0x0101);     break; \
+    case 4:  AV_WN32A(&var, val *         0x01010101);     break; \
+    case 8:  AV_WN64A(&var, val * 0x0101010101010101ULL);  break; \
+    case 16: { \
+        uint64_t v64 = val * 0x0101010101010101ULL; \
+        AV_WN64A(              &var,     v64); \
+        AV_WN64A(&((uint8_t *) &var)[8], v64); \
+        break; \
+    } \
+    }
+#else
+#define SPLAT_CTX(var, val, n) \
+    switch (n) { \
+    case 1:  var = val;                         break; \
+    case 2:  AV_WN16A(&var, val *     0x0101);  break; \
+    case 4:  AV_WN32A(&var, val * 0x01010101);  break; \
+    case 8: { \
+        uint32_t v32 = val * 0x01010101; \
+        AV_WN32A(              &var,     v32); \
+        AV_WN32A(&((uint8_t *) &var)[4], v32); \
+        break; \
+    } \
+    case 16: { \
+        uint32_t v32 = val * 0x01010101; \
+        AV_WN32A(              &var,      v32); \
+        AV_WN32A(&((uint8_t *) &var)[4],  v32); \
+        AV_WN32A(&((uint8_t *) &var)[8],  v32); \
+        AV_WN32A(&((uint8_t *) &var)[12], v32); \
+        break; \
+    } \
+    }
+#endif
+
+    switch (bwh_tab[1][b->bs][0]) {
+#define SET_CTXS(dir, off, n) \
+    do { \
+        SPLAT_CTX(s->dir##_skip_ctx[off],      b->skip,          n); \
+        SPLAT_CTX(s->dir##_txfm_ctx[off],      b->tx,            n); \
+        SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
+        if (!s->s.h.keyframe && !s->s.h.intraonly) { \
+            SPLAT_CTX(s->dir##_intra_ctx[off], b->intra,   n); \
+            SPLAT_CTX(s->dir##_comp_ctx[off],  b->comp,    n); \
+            SPLAT_CTX(s->dir##_mode_ctx[off],  b->mode[3], n); \
+            if (!b->intra) { \
+                SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
+                if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
+                    SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
+                } \
+            } \
+        } \
+    } while (0)
+    case 1: SET_CTXS(above, col, 1); break;
+    case 2: SET_CTXS(above, col, 2); break;
+    case 4: SET_CTXS(above, col, 4); break;
+    case 8: SET_CTXS(above, col, 8); break;
+    }
+    switch (bwh_tab[1][b->bs][1]) {
+    case 1: SET_CTXS(left, row7, 1); break;
+    case 2: SET_CTXS(left, row7, 2); break;
+    case 4: SET_CTXS(left, row7, 4); break;
+    case 8: SET_CTXS(left, row7, 8); break;
+    }
+#undef SPLAT_CTX
+#undef SET_CTXS
+
+    if (!s->s.h.keyframe && !s->s.h.intraonly) {
+        if (b->bs > BS_8x8) {
+            int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
+
+            AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
+            AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
+            AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
+            AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
+            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
+            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
+            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
+            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
+        } else {
+            int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
+
+            for (n = 0; n < w4 * 2; n++) {
+                AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
+                AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
+            }
+            for (n = 0; n < h4 * 2; n++) {
+                AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
+                AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
+            }
+        }
+    }
+
+    // FIXME kinda ugly
+    for (y = 0; y < h4; y++) {
+        int x, o = (row + y) * s->sb_cols * 8 + col;
+        struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
+
+        if (b->intra) {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] =
+                mv[x].ref[1] = -1;
+            }
+        } else if (b->comp) {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] = b->ref[0];
+                mv[x].ref[1] = b->ref[1];
+                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
+                AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
+            }
+        } else {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] = b->ref[0];
+                mv[x].ref[1] = -1;
+                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
+            }
+        }
+    }
+}
+
+// FIXME merge cnt/eob arguments?
+static av_always_inline int
+decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
+                        int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
+                        unsigned (*eob)[6][2], uint8_t (*p)[6][11],
+                        int nnz, const int16_t *scan, const int16_t (*nb)[2],
+                        const int16_t *band_counts, const int16_t *qmul)
+{
+    int i = 0, band = 0, band_left = band_counts[band];
+    uint8_t *tp = p[0][nnz];
+    uint8_t cache[1024];
+
+    do {
+        int val, rc;
+
+        val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
+        eob[band][nnz][val]++;
+        if (!val)
+            break;
+
+    skip_eob:
+        if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
+            cnt[band][nnz][0]++;
+            if (!--band_left)
+                band_left = band_counts[++band];
+            cache[scan[i]] = 0;
+            nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
+            tp = p[band][nnz];
+            if (++i == n_coeffs)
+                break; //invalid input; blocks should end with EOB
+            goto skip_eob;
+        }
+
+        rc = scan[i];
+        if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
+            cnt[band][nnz][1]++;
+            val = 1;
+            cache[rc] = 1;
+        } else {
+            // fill in p[3-10] (model fill) - only once per frame for each pos
+            if (!tp[3])
+                memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
+
+            cnt[band][nnz][2]++;
+            if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
+                if (!vp56_rac_get_prob_branchy(c, tp[4])) {
+                    cache[rc] = val = 2;
+                } else {
+                    val = 3 + vp56_rac_get_prob(c, tp[5]);
+                    cache[rc] = 3;
+                }
+            } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
+                cache[rc] = 4;
+                if (!vp56_rac_get_prob_branchy(c, tp[7])) {
+                    val = 5 + vp56_rac_get_prob(c, 159);
+                } else {
+                    val  = 7 + (vp56_rac_get_prob(c, 165) << 1);
+                    val +=      vp56_rac_get_prob(c, 145);
+                }
+            } else { // cat 3-6
+                cache[rc] = 5;
+                if (!vp56_rac_get_prob_branchy(c, tp[8])) {
+                    if (!vp56_rac_get_prob_branchy(c, tp[9])) {
+                        val  = 11 + (vp56_rac_get_prob(c, 173) << 2);
+                        val +=      (vp56_rac_get_prob(c, 148) << 1);
+                        val +=       vp56_rac_get_prob(c, 140);
+                    } else {
+                        val  = 19 + (vp56_rac_get_prob(c, 176) << 3);
+                        val +=      (vp56_rac_get_prob(c, 155) << 2);
+                        val +=      (vp56_rac_get_prob(c, 140) << 1);
+                        val +=       vp56_rac_get_prob(c, 135);
+                    }
+                } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
+                    val  = 35 + (vp56_rac_get_prob(c, 180) << 4);
+                    val +=      (vp56_rac_get_prob(c, 157) << 3);
+                    val +=      (vp56_rac_get_prob(c, 141) << 2);
+                    val +=      (vp56_rac_get_prob(c, 134) << 1);
+                    val +=       vp56_rac_get_prob(c, 130);
+                } else {
+                    val = 67;
+                    if (!is8bitsperpixel) {
+                        if (bpp == 12) {
+                            val += vp56_rac_get_prob(c, 255) << 17;
+                            val += vp56_rac_get_prob(c, 255) << 16;
+                        }
+                        val +=  (vp56_rac_get_prob(c, 255) << 15);
+                        val +=  (vp56_rac_get_prob(c, 255) << 14);
+                    }
+                    val +=      (vp56_rac_get_prob(c, 254) << 13);
+                    val +=      (vp56_rac_get_prob(c, 254) << 12);
+                    val +=      (vp56_rac_get_prob(c, 254) << 11);
+                    val +=      (vp56_rac_get_prob(c, 252) << 10);
+                    val +=      (vp56_rac_get_prob(c, 249) << 9);
+                    val +=      (vp56_rac_get_prob(c, 243) << 8);
+                    val +=      (vp56_rac_get_prob(c, 230) << 7);
+                    val +=      (vp56_rac_get_prob(c, 196) << 6);
+                    val +=      (vp56_rac_get_prob(c, 177) << 5);
+                    val +=      (vp56_rac_get_prob(c, 153) << 4);
+                    val +=      (vp56_rac_get_prob(c, 140) << 3);
+                    val +=      (vp56_rac_get_prob(c, 133) << 2);
+                    val +=      (vp56_rac_get_prob(c, 130) << 1);
+                    val +=       vp56_rac_get_prob(c, 129);
+                }
+            }
+        }
+#define STORE_COEF(c, i, v) do { \
+    if (is8bitsperpixel) { \
+        c[i] = v; \
+    } else { \
+        AV_WN32A(&c[i * 2], v); \
+    } \
+} while (0)
+        if (!--band_left)
+            band_left = band_counts[++band];
+        if (is_tx32x32)
+            STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
+        else
+            STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
+        nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
+        tp = p[band][nnz];
+    } while (++i < n_coeffs);
+
+    return i;
+}
+
+static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
+                                unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                const int16_t (*nb)[2], const int16_t *band_counts,
+                                const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
+                                  unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                  uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                  const int16_t (*nb)[2], const int16_t *band_counts,
+                                  const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
+                                 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                 const int16_t (*nb)[2], const int16_t *band_counts,
+                                 const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
+                                   unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                                   uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                                   const int16_t (*nb)[2], const int16_t *band_counts,
+                                   const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+    uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
+    unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
+    unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
+    int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
+    int end_x = FFMIN(2 * (s->cols - col), w4);
+    int end_y = FFMIN(2 * (s->rows - row), h4);
+    int n, pl, x, y, res;
+    int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
+    int tx = 4 * s->s.h.lossless + b->tx;
+    const int16_t * const *yscans = vp9_scans[tx];
+    const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
+    const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
+    const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
+    uint8_t *a = &s->above_y_nnz_ctx[col * 2];
+    uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
+    static const int16_t band_counts[4][8] = {
+        { 1, 2, 3, 4,  3,   16 - 13 },
+        { 1, 2, 3, 4, 11,   64 - 21 },
+        { 1, 2, 3, 4, 11,  256 - 21 },
+        { 1, 2, 3, 4, 11, 1024 - 21 },
+    };
+    const int16_t *y_band_counts = band_counts[b->tx];
+    const int16_t *uv_band_counts = band_counts[b->uvtx];
+    int bytesperpixel = is8bitsperpixel ? 1 : 2;
+    int total_coeff = 0;
+
+#define MERGE(la, end, step, rd) \
+    for (n = 0; n < end; n += step) \
+        la[n] = !!rd(&la[n])
+#define MERGE_CTX(step, rd) \
+    do { \
+        MERGE(l, end_y, step, rd); \
+        MERGE(a, end_x, step, rd); \
+    } while (0)
+
+#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
+    for (n = 0, y = 0; y < end_y; y += step) { \
+        for (x = 0; x < end_x; x += step, n += step * step) { \
+            enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
+            res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
+                                    (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
+                                     c, e, p, a[x] + l[y], yscans[txtp], \
+                                     ynbs[txtp], y_band_counts, qmul[0]); \
+            a[x] = l[y] = !!res; \
+            total_coeff |= !!res; \
+            if (step >= 4) { \
+                AV_WN16A(&s->eob[n], res); \
+            } else { \
+                s->eob[n] = res; \
+            } \
+        } \
+    }
+
+#define SPLAT(la, end, step, cond) \
+    if (step == 2) { \
+        for (n = 1; n < end; n += step) \
+            la[n] = la[n - 1]; \
+    } else if (step == 4) { \
+        if (cond) { \
+            for (n = 0; n < end; n += step) \
+                AV_WN32A(&la[n], la[n] * 0x01010101); \
+        } else { \
+            for (n = 0; n < end; n += step) \
+                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
+        } \
+    } else /* step == 8 */ { \
+        if (cond) { \
+            if (HAVE_FAST_64BIT) { \
+                for (n = 0; n < end; n += step) \
+                    AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
+            } else { \
+                for (n = 0; n < end; n += step) { \
+                    uint32_t v32 = la[n] * 0x01010101; \
+                    AV_WN32A(&la[n],     v32); \
+                    AV_WN32A(&la[n + 4], v32); \
+                } \
+            } \
+        } else { \
+            for (n = 0; n < end; n += step) \
+                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
+        } \
+    }
+#define SPLAT_CTX(step) \
+    do { \
+        SPLAT(a, end_x, step, end_x == w4); \
+        SPLAT(l, end_y, step, end_y == h4); \
+    } while (0)
+
+    /* y tokens */
+    switch (b->tx) {
+    case TX_4X4:
+        DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
+        break;
+    case TX_8X8:
+        MERGE_CTX(2, AV_RN16A);
+        DECODE_Y_COEF_LOOP(2, 0,);
+        SPLAT_CTX(2);
+        break;
+    case TX_16X16:
+        MERGE_CTX(4, AV_RN32A);
+        DECODE_Y_COEF_LOOP(4, 0,);
+        SPLAT_CTX(4);
+        break;
+    case TX_32X32:
+        MERGE_CTX(8, AV_RN64A);
+        DECODE_Y_COEF_LOOP(8, 0, 32);
+        SPLAT_CTX(8);
+        break;
+    }
+
+#define DECODE_UV_COEF_LOOP(step, v) \
+    for (n = 0, y = 0; y < end_y; y += step) { \
+        for (x = 0; x < end_x; x += step, n += step * step) { \
+            res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
+                                    (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
+                                     16 * step * step, c, e, p, a[x] + l[y], \
+                                     uvscan, uvnb, uv_band_counts, qmul[1]); \
+            a[x] = l[y] = !!res; \
+            total_coeff |= !!res; \
+            if (step >= 4) { \
+                AV_WN16A(&s->uveob[pl][n], res); \
+            } else { \
+                s->uveob[pl][n] = res; \
+            } \
+        } \
+    }
+
+    p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
+    c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
+    e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
+    w4 >>= s->ss_h;
+    end_x >>= s->ss_h;
+    h4 >>= s->ss_v;
+    end_y >>= s->ss_v;
+    for (pl = 0; pl < 2; pl++) {
+        a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
+        l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
+        switch (b->uvtx) {
+        case TX_4X4:
+            DECODE_UV_COEF_LOOP(1,);
+            break;
+        case TX_8X8:
+            MERGE_CTX(2, AV_RN16A);
+            DECODE_UV_COEF_LOOP(2,);
+            SPLAT_CTX(2);
+            break;
+        case TX_16X16:
+            MERGE_CTX(4, AV_RN32A);
+            DECODE_UV_COEF_LOOP(4,);
+            SPLAT_CTX(4);
+            break;
+        case TX_32X32:
+            MERGE_CTX(8, AV_RN64A);
+            DECODE_UV_COEF_LOOP(8, 32);
+            SPLAT_CTX(8);
+            break;
+        }
+    }
+
+    return total_coeff;
+}
+
+static int decode_coeffs_8bpp(AVCodecContext *ctx)
+{
+    return decode_coeffs(ctx, 1);
+}
+
+static int decode_coeffs_16bpp(AVCodecContext *ctx)
+{
+    return decode_coeffs(ctx, 0);
+}
+
+static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
+                                             uint8_t *dst_edge, ptrdiff_t stride_edge,
+                                             uint8_t *dst_inner, ptrdiff_t stride_inner,
+                                             uint8_t *l, int col, int x, int w,
+                                             int row, int y, enum TxfmMode tx,
+                                             int p, int ss_h, int ss_v, int bytesperpixel)
+{
+    int have_top = row > 0 || y > 0;
+    int have_left = col > s->tile_col_start || x > 0;
+    int have_right = x < w - 1;
+    int bpp = s->bpp;
+    static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
+        [VERT_PRED]            = { { DC_127_PRED,          VERT_PRED },
+                                   { DC_127_PRED,          VERT_PRED } },
+        [HOR_PRED]             = { { DC_129_PRED,          DC_129_PRED },
+                                   { HOR_PRED,             HOR_PRED } },
+        [DC_PRED]              = { { DC_128_PRED,          TOP_DC_PRED },
+                                   { LEFT_DC_PRED,         DC_PRED } },
+        [DIAG_DOWN_LEFT_PRED]  = { { DC_127_PRED,          DIAG_DOWN_LEFT_PRED },
+                                   { DC_127_PRED,          DIAG_DOWN_LEFT_PRED } },
+        [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
+                                   { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
+        [VERT_RIGHT_PRED]      = { { VERT_RIGHT_PRED,      VERT_RIGHT_PRED },
+                                   { VERT_RIGHT_PRED,      VERT_RIGHT_PRED } },
+        [HOR_DOWN_PRED]        = { { HOR_DOWN_PRED,        HOR_DOWN_PRED },
+                                   { HOR_DOWN_PRED,        HOR_DOWN_PRED } },
+        [VERT_LEFT_PRED]       = { { DC_127_PRED,          VERT_LEFT_PRED },
+                                   { DC_127_PRED,          VERT_LEFT_PRED } },
+        [HOR_UP_PRED]          = { { DC_129_PRED,          DC_129_PRED },
+                                   { HOR_UP_PRED,          HOR_UP_PRED } },
+        [TM_VP8_PRED]          = { { DC_129_PRED,          VERT_PRED },
+                                   { HOR_PRED,             TM_VP8_PRED } },
+    };
+    static const struct {
+        uint8_t needs_left:1;
+        uint8_t needs_top:1;
+        uint8_t needs_topleft:1;
+        uint8_t needs_topright:1;
+        uint8_t invert_left:1;
+    } edges[N_INTRA_PRED_MODES] = {
+        [VERT_PRED]            = { .needs_top  = 1 },
+        [HOR_PRED]             = { .needs_left = 1 },
+        [DC_PRED]              = { .needs_top  = 1, .needs_left = 1 },
+        [DIAG_DOWN_LEFT_PRED]  = { .needs_top  = 1, .needs_topright = 1 },
+        [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [VERT_RIGHT_PRED]      = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [HOR_DOWN_PRED]        = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [VERT_LEFT_PRED]       = { .needs_top  = 1, .needs_topright = 1 },
+        [HOR_UP_PRED]          = { .needs_left = 1, .invert_left = 1 },
+        [TM_VP8_PRED]          = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [LEFT_DC_PRED]         = { .needs_left = 1 },
+        [TOP_DC_PRED]          = { .needs_top  = 1 },
+        [DC_128_PRED]          = { 0 },
+        [DC_127_PRED]          = { 0 },
+        [DC_129_PRED]          = { 0 }
+    };
+
+    av_assert2(mode >= 0 && mode < 10);
+    mode = mode_conv[mode][have_left][have_top];
+    if (edges[mode].needs_top) {
+        uint8_t *top, *topleft;
+        int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
+        int n_px_need_tr = 0;
+
+        if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
+            n_px_need_tr = 4;
+
+        // if top of sb64-row, use s->intra_pred_data[] instead of
+        // dst[-stride] for intra prediction (it contains pre- instead of
+        // post-loopfilter data)
+        if (have_top) {
+            top = !(row & 7) && !y ?
+                s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
+                y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
+            if (have_left)
+                topleft = !(row & 7) && !y ?
+                    s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
+                    y == 0 || x == 0 ? &dst_edge[-stride_edge] :
+                    &dst_inner[-stride_inner];
+        }
+
+        if (have_top &&
+            (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
+            (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
+            n_px_need + n_px_need_tr <= n_px_have) {
+            *a = top;
+        } else {
+            if (have_top) {
+                if (n_px_need <= n_px_have) {
+                    memcpy(*a, top, n_px_need * bytesperpixel);
+                } else {
+#define memset_bpp(c, i1, v, i2, num) do { \
+    if (bytesperpixel == 1) { \
+        memset(&(c)[(i1)], (v)[(i2)], (num)); \
+    } else { \
+        int n, val = AV_RN16A(&(v)[(i2) * 2]); \
+        for (n = 0; n < (num); n++) { \
+            AV_WN16A(&(c)[((i1) + n) * 2], val); \
+        } \
+    } \
+} while (0)
+                    memcpy(*a, top, n_px_have * bytesperpixel);
+                    memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
+                }
+            } else {
+#define memset_val(c, val, num) do { \
+    if (bytesperpixel == 1) { \
+        memset((c), (val), (num)); \
+    } else { \
+        int n; \
+        for (n = 0; n < (num); n++) { \
+            AV_WN16A(&(c)[n * 2], (val)); \
+        } \
+    } \
+} while (0)
+                memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
+            }
+            if (edges[mode].needs_topleft) {
+                if (have_left && have_top) {
+#define assign_bpp(c, i1, v, i2) do { \
+    if (bytesperpixel == 1) { \
+        (c)[(i1)] = (v)[(i2)]; \
+    } else { \
+        AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
+    } \
+} while (0)
+                    assign_bpp(*a, -1, topleft, -1);
+                } else {
+#define assign_val(c, i, v) do { \
+    if (bytesperpixel == 1) { \
+        (c)[(i)] = (v); \
+    } else { \
+        AV_WN16A(&(c)[(i) * 2], (v)); \
+    } \
+} while (0)
+                    assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
+                }
+            }
+            if (tx == TX_4X4 && edges[mode].needs_topright) {
+                if (have_top && have_right &&
+                    n_px_need + n_px_need_tr <= n_px_have) {
+                    memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
+                } else {
+                    memset_bpp(*a, 4, *a, 3, 4);
+                }
+            }
+        }
+    }
+    if (edges[mode].needs_left) {
+        if (have_left) {
+            int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
+            uint8_t *dst = x == 0 ? dst_edge : dst_inner;
+            ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
+
+            if (edges[mode].invert_left) {
+                if (n_px_need <= n_px_have) {
+                    for (i = 0; i < n_px_need; i++)
+                        assign_bpp(l, i, &dst[i * stride], -1);
+                } else {
+                    for (i = 0; i < n_px_have; i++)
+                        assign_bpp(l, i, &dst[i * stride], -1);
+                    memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
+                }
+            } else {
+                if (n_px_need <= n_px_have) {
+                    for (i = 0; i < n_px_need; i++)
+                        assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
+                } else {
+                    for (i = 0; i < n_px_have; i++)
+                        assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
+                    memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
+                }
+            }
+        } else {
+            memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
+        }
+    }
+
+    return mode;
+}
+
+static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
+                                         ptrdiff_t uv_off, int bytesperpixel)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+    int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
+    int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
+    int end_x = FFMIN(2 * (s->cols - col), w4);
+    int end_y = FFMIN(2 * (s->rows - row), h4);
+    int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
+    int uvstep1d = 1 << b->uvtx, p;
+    uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
+    LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
+    LOCAL_ALIGNED_32(uint8_t, l, [64]);
+
+    for (n = 0, y = 0; y < end_y; y += step1d) {
+        uint8_t *ptr = dst, *ptr_r = dst_r;
+        for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
+                               ptr_r += 4 * step1d * bytesperpixel, n += step) {
+            int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
+                               y * 2 + x : 0];
+            uint8_t *a = &a_buf[32];
+            enum TxfmType txtp = vp9_intra_txfm_type[mode];
+            int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
+
+            mode = check_intra_mode(s, mode, &a, ptr_r,
+                                    s->s.frames[CUR_FRAME].tf.f->linesize[0],
+                                    ptr, s->y_stride, l,
+                                    col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
+            s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
+            if (eob)
+                s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
+                                           s->block + 16 * n * bytesperpixel, eob);
+        }
+        dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
+        dst   += 4 * step1d * s->y_stride;
+    }
+
+    // U/V
+    w4 >>= s->ss_h;
+    end_x >>= s->ss_h;
+    end_y >>= s->ss_v;
+    step = 1 << (b->uvtx * 2);
+    for (p = 0; p < 2; p++) {
+        dst   = s->dst[1 + p];
+        dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
+        for (n = 0, y = 0; y < end_y; y += uvstep1d) {
+            uint8_t *ptr = dst, *ptr_r = dst_r;
+            for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
+                                   ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
+                int mode = b->uvmode;
+                uint8_t *a = &a_buf[32];
+                int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
+
+                mode = check_intra_mode(s, mode, &a, ptr_r,
+                                        s->s.frames[CUR_FRAME].tf.f->linesize[1],
+                                        ptr, s->uv_stride, l, col, x, w4, row, y,
+                                        b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
+                s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
+                if (eob)
+                    s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
+                                                    s->uvblock[p] + 16 * n * bytesperpixel, eob);
+            }
+            dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
+            dst   += 4 * uvstep1d * s->uv_stride;
+        }
+    }
+}
+
+static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
+{
+    intra_recon(ctx, y_off, uv_off, 1);
+}
+
+static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
+{
+    intra_recon(ctx, y_off, uv_off, 2);
+}
+
+static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+                                              uint8_t *dst, ptrdiff_t dst_stride,
+                                              const uint8_t *ref, ptrdiff_t ref_stride,
+                                              ThreadFrame *ref_frame,
+                                              ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+                                              int bw, int bh, int w, int h, int bytesperpixel)
+{
+    int mx = mv->x, my = mv->y, th;
+
+    y += my >> 3;
+    x += mx >> 3;
+    ref += y * ref_stride + x * bytesperpixel;
+    mx &= 7;
+    my &= 7;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> 6;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
+                                 160, ref_stride,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        ref_stride = 160;
+    }
+    mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
+}
+
+static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+                                                uint8_t *dst_u, uint8_t *dst_v,
+                                                ptrdiff_t dst_stride,
+                                                const uint8_t *ref_u, ptrdiff_t src_stride_u,
+                                                const uint8_t *ref_v, ptrdiff_t src_stride_v,
+                                                ThreadFrame *ref_frame,
+                                                ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+                                                int bw, int bh, int w, int h, int bytesperpixel)
+{
+    int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
+
+    y += my >> 4;
+    x += mx >> 4;
+    ref_u += y * src_stride_u + x * bytesperpixel;
+    ref_v += y * src_stride_v + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
+                                 160, src_stride_u,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
+
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
+                                 160, src_stride_v,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
+    } else {
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
+    }
+}
+
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+                    px, py, pw, ph, bw, bh, w, h, i) \
+    mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+                     mv, bw, bh, w, h, bytesperpixel)
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                      row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+    mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                       row, col, mv, bw, bh, w, h, bytesperpixel)
+#define SCALED 0
+#define FN(x) x##_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
+static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+                                            vp9_mc_func (*mc)[2],
+                                            uint8_t *dst, ptrdiff_t dst_stride,
+                                            const uint8_t *ref, ptrdiff_t ref_stride,
+                                            ThreadFrame *ref_frame,
+                                            ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
+                                            int px, int py, int pw, int ph,
+                                            int bw, int bh, int w, int h, int bytesperpixel,
+                                            const uint16_t *scale, const uint8_t *step)
+{
+    if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+        s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+        mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
+                         y, x, in_mv, bw, bh, w, h, bytesperpixel);
+    } else {
+#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
+    int mx, my;
+    int refbw_m1, refbh_m1;
+    int th;
+    VP56mv mv;
+
+    mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
+    mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
+    // BUG libvpx seems to scale the two components separately. This introduces
+    // rounding errors but we have to reproduce them to be exactly compatible
+    // with the output from libvpx...
+    mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
+    my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
+
+    y = my >> 4;
+    x = mx >> 4;
+    ref += y * ref_stride + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
+    refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + refbh_m1 + 4 + 7) >> 6;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref - 3 * ref_stride - 3 * bytesperpixel,
+                                 288, ref_stride,
+                                 refbw_m1 + 8, refbh_m1 + 8,
+                                 x - 3, y - 3, w, h);
+        ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
+        ref_stride = 288;
+    }
+    smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
+    }
+}
+
+static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+                                              vp9_mc_func (*mc)[2],
+                                              uint8_t *dst_u, uint8_t *dst_v,
+                                              ptrdiff_t dst_stride,
+                                              const uint8_t *ref_u, ptrdiff_t src_stride_u,
+                                              const uint8_t *ref_v, ptrdiff_t src_stride_v,
+                                              ThreadFrame *ref_frame,
+                                              ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
+                                              int px, int py, int pw, int ph,
+                                              int bw, int bh, int w, int h, int bytesperpixel,
+                                              const uint16_t *scale, const uint8_t *step)
+{
+    if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+        s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+        mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
+                           ref_v, src_stride_v, ref_frame,
+                           y, x, in_mv, bw, bh, w, h, bytesperpixel);
+    } else {
+    int mx, my;
+    int refbw_m1, refbh_m1;
+    int th;
+    VP56mv mv;
+
+    if (s->ss_h) {
+        // BUG https://code.google.com/p/webm/issues/detail?id=820
+        mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
+        mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
+    } else {
+        mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
+        mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
+    }
+    if (s->ss_v) {
+        // BUG https://code.google.com/p/webm/issues/detail?id=820
+        mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
+        my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
+    } else {
+        mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
+        my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
+    }
+#undef scale_mv
+    y = my >> 4;
+    x = mx >> 4;
+    ref_u += y * src_stride_u + x * bytesperpixel;
+    ref_v += y * src_stride_v + x * bytesperpixel;
+    mx &= 15;
+    my &= 15;
+    refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
+    refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
+                                 288, src_stride_u,
+                                 refbw_m1 + 8, refbh_m1 + 8,
+                                 x - 3, y - 3, w, h);
+        ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
+        smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
+
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
+                                 288, src_stride_v,
+                                 refbw_m1 + 8, refbh_m1 + 8,
+                                 x - 3, y - 3, w, h);
+        ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
+        smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
+    } else {
+        smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
+        smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
+    }
+    }
+}
+
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+                    px, py, pw, ph, bw, bh, w, h, i) \
+    mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+                   mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
+                   s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                      row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+    mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+                     row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
+                     s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
+#define SCALED 1
+#define FN(x) x##_scaled_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_scaled_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
+static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+
+    if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
+        if (bytesperpixel == 1) {
+            inter_pred_scaled_8bpp(ctx);
+        } else {
+            inter_pred_scaled_16bpp(ctx);
+        }
+    } else {
+        if (bytesperpixel == 1) {
+            inter_pred_8bpp(ctx);
+        } else {
+            inter_pred_16bpp(ctx);
+        }
+    }
+    if (!b->skip) {
+        /* mostly copied intra_recon() */
+
+        int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
+        int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
+        int end_x = FFMIN(2 * (s->cols - col), w4);
+        int end_y = FFMIN(2 * (s->rows - row), h4);
+        int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
+        int uvstep1d = 1 << b->uvtx, p;
+        uint8_t *dst = s->dst[0];
+
+        // y itxfm add
+        for (n = 0, y = 0; y < end_y; y += step1d) {
+            uint8_t *ptr = dst;
+            for (x = 0; x < end_x; x += step1d,
+                 ptr += 4 * step1d * bytesperpixel, n += step) {
+                int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
+
+                if (eob)
+                    s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
+                                                  s->block + 16 * n * bytesperpixel, eob);
+            }
+            dst += 4 * s->y_stride * step1d;
+        }
+
+        // uv itxfm add
+        end_x >>= s->ss_h;
+        end_y >>= s->ss_v;
+        step = 1 << (b->uvtx * 2);
+        for (p = 0; p < 2; p++) {
+            dst = s->dst[p + 1];
+            for (n = 0, y = 0; y < end_y; y += uvstep1d) {
+                uint8_t *ptr = dst;
+                for (x = 0; x < end_x; x += uvstep1d,
+                     ptr += 4 * uvstep1d * bytesperpixel, n += step) {
+                    int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
+
+                    if (eob)
+                        s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
+                                                        s->uvblock[p] + 16 * n * bytesperpixel, eob);
+                }
+                dst += 4 * uvstep1d * s->uv_stride;
+            }
+        }
+    }
+}
+
+static void inter_recon_8bpp(AVCodecContext *ctx)
+{
+    inter_recon(ctx, 1);
+}
+
+static void inter_recon_16bpp(AVCodecContext *ctx)
+{
+    inter_recon(ctx, 2);
+}
+
+static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
+                                        int row_and_7, int col_and_7,
+                                        int w, int h, int col_end, int row_end,
+                                        enum TxfmMode tx, int skip_inter)
+{
+    static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
+    static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
+
+    // FIXME I'm pretty sure all loops can be replaced by a single LUT if
+    // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
+    // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
+    // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
+
+    // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
+    // edges. This means that for UV, we work on two subsampled blocks at
+    // a time, and we only use the topleft block's mode information to set
+    // things like block strength. Thus, for any block size smaller than
+    // 16x16, ignore the odd portion of the block.
+    if (tx == TX_4X4 && (ss_v | ss_h)) {
+        if (h == ss_v) {
+            if (row_and_7 & 1)
+                return;
+            if (!row_end)
+                h += 1;
+        }
+        if (w == ss_h) {
+            if (col_and_7 & 1)
+                return;
+            if (!col_end)
+                w += 1;
+        }
+    }
+
+    if (tx == TX_4X4 && !skip_inter) {
+        int t = 1 << col_and_7, m_col = (t << w) - t, y;
+        // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
+        int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
+
+        for (y = row_and_7; y < h + row_and_7; y++) {
+            int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
+
+            mask[0][y][1] |= m_row_8;
+            mask[0][y][2] |= m_row_4;
+            // for odd lines, if the odd col is not being filtered,
+            // skip odd row also:
+            // .---. <-- a
+            // |   |
+            // |___| <-- b
+            // ^   ^
+            // c   d
+            //
+            // if a/c are even row/col and b/d are odd, and d is skipped,
+            // e.g. right edge of size-66x66.webm, then skip b also (bug)
+            if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
+                mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
+            } else {
+                mask[1][y][col_mask_id] |= m_col;
+            }
+            if (!ss_h)
+                mask[0][y][3] |= m_col;
+            if (!ss_v) {
+                if (ss_h && (col_end & 1))
+                    mask[1][y][3] |= (t << (w - 1)) - t;
+                else
+                    mask[1][y][3] |= m_col;
+            }
+        }
+    } else {
+        int y, t = 1 << col_and_7, m_col = (t << w) - t;
+
+        if (!skip_inter) {
+            int mask_id = (tx == TX_8X8);
+            static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
+            int l2 = tx + ss_h - 1, step1d;
+            int m_row = m_col & masks[l2];
+
+            // at odd UV col/row edges tx16/tx32 loopfilter edges, force
+            // 8wd loopfilter to prevent going off the visible edge.
+            if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
+                int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
+                int m_row_8 = m_row - m_row_16;
+
+                for (y = row_and_7; y < h + row_and_7; y++) {
+                    mask[0][y][0] |= m_row_16;
+                    mask[0][y][1] |= m_row_8;
+                }
+            } else {
+                for (y = row_and_7; y < h + row_and_7; y++)
+                    mask[0][y][mask_id] |= m_row;
+            }
+
+            l2 = tx + ss_v - 1;
+            step1d = 1 << l2;
+            if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
+                for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
+                    mask[1][y][0] |= m_col;
+                if (y - row_and_7 == h - 1)
+                    mask[1][y][1] |= m_col;
+            } else {
+                for (y = row_and_7; y < h + row_and_7; y += step1d)
+                    mask[1][y][mask_id] |= m_col;
+            }
+        } else if (tx != TX_4X4) {
+            int mask_id;
+
+            mask_id = (tx == TX_8X8) || (h == ss_v);
+            mask[1][row_and_7][mask_id] |= m_col;
+            mask_id = (tx == TX_8X8) || (w == ss_h);
+            for (y = row_and_7; y < h + row_and_7; y++)
+                mask[0][y][mask_id] |= t;
+        } else {
+            int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
+
+            for (y = row_and_7; y < h + row_and_7; y++) {
+                mask[0][y][2] |= t4;
+                mask[0][y][1] |= t8;
+            }
+            mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
+        }
+    }
+}
+
+static void decode_b(AVCodecContext *ctx, int row, int col,
+                     struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
+                     enum BlockLevel bl, enum BlockPartition bp)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    enum BlockSize bs = bl * 3 + bp;
+    int bytesperpixel = s->bytesperpixel;
+    int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
+    int emu[2];
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+
+    s->row = row;
+    s->row7 = row & 7;
+    s->col = col;
+    s->col7 = col & 7;
+    s->min_mv.x = -(128 + col * 64);
+    s->min_mv.y = -(128 + row * 64);
+    s->max_mv.x = 128 + (s->cols - col - w4) * 64;
+    s->max_mv.y = 128 + (s->rows - row - h4) * 64;
+    if (s->pass < 2) {
+        b->bs = bs;
+        b->bl = bl;
+        b->bp = bp;
+        decode_mode(ctx);
+        b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
+                           (s->ss_v && h4 * 2 == (1 << b->tx)));
+
+        if (!b->skip) {
+            int has_coeffs;
+
+            if (bytesperpixel == 1) {
+                has_coeffs = decode_coeffs_8bpp(ctx);
+            } else {
+                has_coeffs = decode_coeffs_16bpp(ctx);
+            }
+            if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
+                b->skip = 1;
+                memset(&s->above_skip_ctx[col], 1, w4);
+                memset(&s->left_skip_ctx[s->row7], 1, h4);
+            }
+        } else {
+            int row7 = s->row7;
+
+#define SPLAT_ZERO_CTX(v, n) \
+    switch (n) { \
+    case 1:  v = 0;          break; \
+    case 2:  AV_ZERO16(&v);  break; \
+    case 4:  AV_ZERO32(&v);  break; \
+    case 8:  AV_ZERO64(&v);  break; \
+    case 16: AV_ZERO128(&v); break; \
+    }
+#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
+    do { \
+        SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
+        if (s->ss_##dir2) { \
+            SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
+            SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
+        } else { \
+            SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
+            SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
+        } \
+    } while (0)
+
+            switch (w4) {
+            case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
+            case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
+            case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
+            case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
+            }
+            switch (h4) {
+            case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
+            case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
+            case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
+            case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
+            }
+        }
+
+        if (s->pass == 1) {
+            s->b++;
+            s->block += w4 * h4 * 64 * bytesperpixel;
+            s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
+            s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
+            s->eob += 4 * w4 * h4;
+            s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
+            s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
+
+            return;
+        }
+    }
+
+    // emulated overhangs if the stride of the target buffer can't hold. This
+    // makes it possible to support emu-edge and so on even if we have large block
+    // overhangs
+    emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
+             (row + h4) > s->rows;
+    emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
+             (row + h4) > s->rows;
+    if (emu[0]) {
+        s->dst[0] = s->tmp_y;
+        s->y_stride = 128;
+    } else {
+        s->dst[0] = f->data[0] + yoff;
+        s->y_stride = f->linesize[0];
+    }
+    if (emu[1]) {
+        s->dst[1] = s->tmp_uv[0];
+        s->dst[2] = s->tmp_uv[1];
+        s->uv_stride = 128;
+    } else {
+        s->dst[1] = f->data[1] + uvoff;
+        s->dst[2] = f->data[2] + uvoff;
+        s->uv_stride = f->linesize[1];
+    }
+    if (b->intra) {
+        if (s->bpp > 8) {
+            intra_recon_16bpp(ctx, yoff, uvoff);
+        } else {
+            intra_recon_8bpp(ctx, yoff, uvoff);
+        }
+    } else {
+        if (s->bpp > 8) {
+            inter_recon_16bpp(ctx);
+        } else {
+            inter_recon_8bpp(ctx);
+        }
+    }
+    if (emu[0]) {
+        int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
+
+        for (n = 0; o < w; n++) {
+            int bw = 64 >> n;
+
+            av_assert2(n <= 4);
+            if (w & bw) {
+                s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
+                                         s->tmp_y + o * bytesperpixel, 128, h, 0, 0);
+                o += bw;
+            }
+        }
+    }
+    if (emu[1]) {
+        int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
+        int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
+
+        for (n = s->ss_h; o < w; n++) {
+            int bw = 64 >> n;
+
+            av_assert2(n <= 4);
+            if (w & bw) {
+                s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
+                                         s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
+                s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
+                                         s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
+                o += bw;
+            }
+        }
+    }
+
+    // pick filter level and find edges to apply filter to
+    if (s->s.h.filter.level &&
+        (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
+                                                      [b->mode[3] != ZEROMV]) > 0) {
+        int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
+        int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
+
+        setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
+        mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
+        if (s->ss_h || s->ss_v)
+            mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
+                       s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
+                       s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
+                       b->uvtx, skip_inter);
+
+        if (!s->filter_lut.lim_lut[lvl]) {
+            int sharp = s->s.h.filter.sharpness;
+            int limit = lvl;
+
+            if (sharp > 0) {
+                limit >>= (sharp + 3) >> 2;
+                limit = FFMIN(limit, 9 - sharp);
+            }
+            limit = FFMAX(limit, 1);
+
+            s->filter_lut.lim_lut[lvl] = limit;
+            s->filter_lut.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
+        }
+    }
+
+    if (s->pass == 2) {
+        s->b++;
+        s->block += w4 * h4 * 64 * bytesperpixel;
+        s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
+        s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
+        s->eob += 4 * w4 * h4;
+        s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
+        s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
+    }
+}
+
+static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
+                      ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+{
+    VP9Context *s = ctx->priv_data;
+    int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
+            (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
+    const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? vp9_default_kf_partition_probs[bl][c] :
+                                                     s->prob.p.partition[bl][c];
+    enum BlockPartition bp;
+    ptrdiff_t hbs = 4 >> bl;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
+    int bytesperpixel = s->bytesperpixel;
+
+    if (bl == BL_8X8) {
+        bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
+        decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+    } else if (col + hbs < s->cols) { // FIXME why not <=?
+        if (row + hbs < s->rows) { // FIXME why not <=?
+            bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
+            switch (bp) {
+            case PARTITION_NONE:
+                decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+                break;
+            case PARTITION_H:
+                decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 8 * uv_stride >> s->ss_v;
+                decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
+                break;
+            case PARTITION_V:
+                decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+                yoff  += hbs * 8 * bytesperpixel;
+                uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
+                decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
+                break;
+            case PARTITION_SPLIT:
+                decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb(ctx, row, col + hbs, lflvl,
+                          yoff + 8 * hbs * bytesperpixel,
+                          uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 8 * uv_stride >> s->ss_v;
+                decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb(ctx, row + hbs, col + hbs, lflvl,
+                          yoff + 8 * hbs * bytesperpixel,
+                          uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+                break;
+            default:
+                av_assert0(0);
+            }
+        } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
+            bp = PARTITION_SPLIT;
+            decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+            decode_sb(ctx, row, col + hbs, lflvl,
+                      yoff + 8 * hbs * bytesperpixel,
+                      uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+        } else {
+            bp = PARTITION_H;
+            decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+        }
+    } else if (row + hbs < s->rows) { // FIXME why not <=?
+        if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
+            bp = PARTITION_SPLIT;
+            decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 8 * uv_stride >> s->ss_v;
+            decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+        } else {
+            bp = PARTITION_V;
+            decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+        }
+    } else {
+        bp = PARTITION_SPLIT;
+        decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+    }
+    s->counts.partition[bl][c][bp]++;
+}
+
+static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
+                          ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    ptrdiff_t hbs = 4 >> bl;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
+    int bytesperpixel = s->bytesperpixel;
+
+    if (bl == BL_8X8) {
+        av_assert2(b->bl == BL_8X8);
+        decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
+    } else if (s->b->bl == bl) {
+        decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
+        if (b->bp == PARTITION_H && row + hbs < s->rows) {
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 8 * uv_stride >> s->ss_v;
+            decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
+        } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
+            yoff  += hbs * 8 * bytesperpixel;
+            uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
+            decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
+        }
+    } else {
+        decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+        if (col + hbs < s->cols) { // FIXME why not <=?
+            if (row + hbs < s->rows) {
+                decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
+                              uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 8 * uv_stride >> s->ss_v;
+                decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
+                              yoff + 8 * hbs * bytesperpixel,
+                              uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
+            } else {
+                yoff  += hbs * 8 * bytesperpixel;
+                uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
+                decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
+            }
+        } else if (row + hbs < s->rows) {
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 8 * uv_stride >> s->ss_v;
+            decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+        }
+    }
+}
+
+static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
+                                               uint8_t *lvl, uint8_t (*mask)[4],
+                                               uint8_t *dst, ptrdiff_t ls)
+{
+    int y, x, bytesperpixel = s->bytesperpixel;
+
+    // filter edges between columns (e.g. block1 | block2)
+    for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
+        uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
+        unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
+        unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
+        unsigned hm = hm1 | hm2 | hm13 | hm23;
+
+        for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
+            if (col || x > 1) {
+                if (hm1 & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (hmask1[0] & x) {
+                        if (hmask2[0] & x) {
+                            av_assert2(l[8 << ss_v] == L);
+                            s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
+                        } else {
+                            s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
+                        }
+                    } else if (hm2 & x) {
+                        L = l[8 << ss_v];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
+                                               [!!(hmask2[1] & x)]
+                                               [0](ptr, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[!!(hmask1[1] & x)]
+                                            [0](ptr, ls, E, I, H);
+                    }
+                } else if (hm2 & x) {
+                    int L = l[8 << ss_v], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[!!(hmask2[1] & x)]
+                                        [0](ptr + 8 * ls, ls, E, I, H);
+                }
+            }
+            if (ss_h) {
+                if (x & 0xAA)
+                    l += 2;
+            } else {
+                if (hm13 & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (hm23 & x) {
+                        L = l[8 << ss_v];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
+                    }
+                } else if (hm23 & x) {
+                    int L = l[8 << ss_v], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
+                }
+                l++;
+            }
+        }
+    }
+}
+
+static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
+                                               uint8_t *lvl, uint8_t (*mask)[4],
+                                               uint8_t *dst, ptrdiff_t ls)
+{
+    int y, x, bytesperpixel = s->bytesperpixel;
+
+    //                                 block1
+    // filter edges between rows (e.g. ------)
+    //                                 block2
+    for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
+        uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
+        unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
+
+        for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
+            if (row || y) {
+                if (vm & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (vmask[0] & x) {
+                        if (vmask[0] & (x << (1 + ss_h))) {
+                            av_assert2(l[1 + ss_h] == L);
+                            s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
+                        } else {
+                            s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
+                        }
+                    } else if (vm & (x << (1 + ss_h))) {
+                        L = l[1 + ss_h];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[!!(vmask[1] &  x)]
+                                               [!!(vmask[1] & (x << (1 + ss_h)))]
+                                               [1](ptr, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[!!(vmask[1] & x)]
+                                            [1](ptr, ls, E, I, H);
+                    }
+                } else if (vm & (x << (1 + ss_h))) {
+                    int L = l[1 + ss_h], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
+                                        [1](ptr + 8 * bytesperpixel, ls, E, I, H);
+                }
+            }
+            if (!ss_v) {
+                if (vm3 & x) {
+                    int L = *l, H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    if (vm3 & (x << (1 + ss_h))) {
+                        L = l[1 + ss_h];
+                        H |= (L >> 4) << 8;
+                        E |= s->filter_lut.mblim_lut[L] << 8;
+                        I |= s->filter_lut.lim_lut[L] << 8;
+                        s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
+                    } else {
+                        s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
+                    }
+                } else if (vm3 & (x << (1 + ss_h))) {
+                    int L = l[1 + ss_h], H = L >> 4;
+                    int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
+
+                    s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
+                }
+            }
+        }
+        if (ss_v) {
+            if (y & 1)
+                lvl += 16;
+        } else {
+            lvl += 8;
+        }
+    }
+}
+
+static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
+                          int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
+{
+    VP9Context *s = ctx->priv_data;
+    AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
+    uint8_t *dst = f->data[0] + yoff;
+    ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
+    uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
+    int p;
+
+    // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
+    // if you think of them as acting on a 8x8 block max, we can interleave
+    // each v/h within the single x loop, but that only works if we work on
+    // 8 pixel blocks, and we won't always do that (we want at least 16px
+    // to use SSE2 optimizations, perhaps 32 for AVX2)
+
+    filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
+    filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
+
+    for (p = 0; p < 2; p++) {
+        dst = f->data[1 + p] + uvoff;
+        filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
+        filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
+    }
+}
+
+static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
+{
+    int sb_start = ( idx      * n) >> log2_n;
+    int sb_end   = ((idx + 1) * n) >> log2_n;
+    *start = FFMIN(sb_start, n) << 3;
+    *end   = FFMIN(sb_end,   n) << 3;
+}
+
+static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
+                                        int max_count, int update_factor)
+{
+    unsigned ct = ct0 + ct1, p2, p1;
+
+    if (!ct)
+        return;
+
+    update_factor = FASTDIV(update_factor * FFMIN(ct, max_count), max_count);
+    p1 = *p;
+    p2 = ((((int64_t) ct0) << 8) + (ct >> 1)) / ct;
+    p2 = av_clip(p2, 1, 255);
+
+    // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
+    *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
+}
+
+static void adapt_probs(VP9Context *s)
+{
+    int i, j, k, l, m;
+    prob_context *p = &s->prob_ctx[s->s.h.framectxid].p;
+    int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128;
+
+    // coefficients
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 2; j++)
+            for (k = 0; k < 2; k++)
+                for (l = 0; l < 6; l++)
+                    for (m = 0; m < 6; m++) {
+                        uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
+                        unsigned *e = s->counts.eob[i][j][k][l][m];
+                        unsigned *c = s->counts.coef[i][j][k][l][m];
+
+                        if (l == 0 && m >= 3) // dc only has 3 pt
+                            break;
+
+                        adapt_prob(&pp[0], e[0], e[1], 24, uf);
+                        adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
+                        adapt_prob(&pp[2], c[1], c[2], 24, uf);
+                    }
+
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        memcpy(p->skip,  s->prob.p.skip,  sizeof(p->skip));
+        memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
+        memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
+        memcpy(p->tx8p,  s->prob.p.tx8p,  sizeof(p->tx8p));
+        return;
+    }
+
+    // skip flag
+    for (i = 0; i < 3; i++)
+        adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
+
+    // intra/inter flag
+    for (i = 0; i < 4; i++)
+        adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
+
+    // comppred flag
+    if (s->s.h.comppredmode == PRED_SWITCHABLE) {
+      for (i = 0; i < 5; i++)
+          adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
+    }
+
+    // reference frames
+    if (s->s.h.comppredmode != PRED_SINGLEREF) {
+      for (i = 0; i < 5; i++)
+          adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
+                     s->counts.comp_ref[i][1], 20, 128);
+    }
+
+    if (s->s.h.comppredmode != PRED_COMPREF) {
+      for (i = 0; i < 5; i++) {
+          uint8_t *pp = p->single_ref[i];
+          unsigned (*c)[2] = s->counts.single_ref[i];
+
+          adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
+          adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
+      }
+    }
+
+    // block partitioning
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 4; j++) {
+            uint8_t *pp = p->partition[i][j];
+            unsigned *c = s->counts.partition[i][j];
+
+            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+            adapt_prob(&pp[2], c[2], c[3], 20, 128);
+        }
+
+    // tx size
+    if (s->s.h.txfmmode == TX_SWITCHABLE) {
+      for (i = 0; i < 2; i++) {
+          unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
+
+          adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
+          adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
+          adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
+          adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
+          adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
+          adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
+      }
+    }
+
+    // interpolation filter
+    if (s->s.h.filtermode == FILTER_SWITCHABLE) {
+        for (i = 0; i < 4; i++) {
+            uint8_t *pp = p->filter[i];
+            unsigned *c = s->counts.filter[i];
+
+            adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2], 20, 128);
+        }
+    }
+
+    // inter modes
+    for (i = 0; i < 7; i++) {
+        uint8_t *pp = p->mv_mode[i];
+        unsigned *c = s->counts.mv_mode[i];
+
+        adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[1], c[3], 20, 128);
+    }
+
+    // mv joints
+    {
+        uint8_t *pp = p->mv_joint;
+        unsigned *c = s->counts.mv_joint;
+
+        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[2], c[3], 20, 128);
+    }
+
+    // mv components
+    for (i = 0; i < 2; i++) {
+        uint8_t *pp;
+        unsigned *c, (*c2)[2], sum;
+
+        adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
+                   s->counts.mv_comp[i].sign[1], 20, 128);
+
+        pp = p->mv_comp[i].classes;
+        c = s->counts.mv_comp[i].classes;
+        sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
+        adapt_prob(&pp[0], c[0], sum, 20, 128);
+        sum -= c[1];
+        adapt_prob(&pp[1], c[1], sum, 20, 128);
+        sum -= c[2] + c[3];
+        adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
+        adapt_prob(&pp[3], c[2], c[3], 20, 128);
+        sum -= c[4] + c[5];
+        adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
+        adapt_prob(&pp[5], c[4], c[5], 20, 128);
+        sum -= c[6];
+        adapt_prob(&pp[6], c[6], sum, 20, 128);
+        adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
+        adapt_prob(&pp[8], c[7], c[8], 20, 128);
+        adapt_prob(&pp[9], c[9], c[10], 20, 128);
+
+        adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
+                   s->counts.mv_comp[i].class0[1], 20, 128);
+        pp = p->mv_comp[i].bits;
+        c2 = s->counts.mv_comp[i].bits;
+        for (j = 0; j < 10; j++)
+            adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
+
+        for (j = 0; j < 2; j++) {
+            pp = p->mv_comp[i].class0_fp[j];
+            c = s->counts.mv_comp[i].class0_fp[j];
+            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+            adapt_prob(&pp[2], c[2], c[3], 20, 128);
+        }
+        pp = p->mv_comp[i].fp;
+        c = s->counts.mv_comp[i].fp;
+        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[2], c[3], 20, 128);
+
+        if (s->s.h.highprecisionmvs) {
+            adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
+                       s->counts.mv_comp[i].class0_hp[1], 20, 128);
+            adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
+                       s->counts.mv_comp[i].hp[1], 20, 128);
+        }
+    }
+
+    // y intra modes
+    for (i = 0; i < 4; i++) {
+        uint8_t *pp = p->y_mode[i];
+        unsigned *c = s->counts.y_mode[i], sum, s2;
+
+        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
+        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
+        sum -= c[TM_VP8_PRED];
+        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
+        sum -= c[VERT_PRED];
+        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
+        s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
+        sum -= s2;
+        adapt_prob(&pp[3], s2, sum, 20, 128);
+        s2 -= c[HOR_PRED];
+        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
+        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
+        sum -= c[DIAG_DOWN_LEFT_PRED];
+        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
+        sum -= c[VERT_LEFT_PRED];
+        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
+        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
+    }
+
+    // uv intra modes
+    for (i = 0; i < 10; i++) {
+        uint8_t *pp = p->uv_mode[i];
+        unsigned *c = s->counts.uv_mode[i], sum, s2;
+
+        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
+        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
+        sum -= c[TM_VP8_PRED];
+        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
+        sum -= c[VERT_PRED];
+        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
+        s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
+        sum -= s2;
+        adapt_prob(&pp[3], s2, sum, 20, 128);
+        s2 -= c[HOR_PRED];
+        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
+        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
+        sum -= c[DIAG_DOWN_LEFT_PRED];
+        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
+        sum -= c[VERT_LEFT_PRED];
+        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
+        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
+    }
+}
+
+static void free_buffers(VP9Context *s)
+{
+    av_freep(&s->intra_pred_data[0]);
+    av_freep(&s->b_base);
+    av_freep(&s->block_base);
+}
+
+static av_cold int vp9_decode_free(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    int i;
+
+    for (i = 0; i < 3; i++) {
+        if (s->s.frames[i].tf.f->buf[0])
+            vp9_unref_frame(ctx, &s->s.frames[i]);
+        av_frame_free(&s->s.frames[i].tf.f);
+    }
+    for (i = 0; i < 8; i++) {
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_buffer(ctx, &s->s.refs[i]);
+        av_frame_free(&s->s.refs[i].f);
+        if (s->next_refs[i].f->buf[0])
+            ff_thread_release_buffer(ctx, &s->next_refs[i]);
+        av_frame_free(&s->next_refs[i].f);
+    }
+    free_buffers(s);
+    av_freep(&s->c_b);
+    s->c_b_size = 0;
+
+    return 0;
+}
+
+
+static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
+                            int *got_frame, AVPacket *pkt)
+{
+    const uint8_t *data = pkt->data;
+    int size = pkt->size;
+    VP9Context *s = ctx->priv_data;
+    int res, tile_row, tile_col, i, ref, row, col;
+    int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
+                            (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
+    ptrdiff_t yoff, uvoff, ls_y, ls_uv;
+    AVFrame *f;
+    int bytesperpixel;
+
+    if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
+        return res;
+    } else if (res == 0) {
+        if (!s->s.refs[ref].f->buf[0]) {
+            av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
+            return AVERROR_INVALIDDATA;
+        }
+        if ((res = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
+            return res;
+        ((AVFrame *)frame)->pkt_pts = pkt->pts;
+        ((AVFrame *)frame)->pkt_dts = pkt->dts;
+        for (i = 0; i < 8; i++) {
+            if (s->next_refs[i].f->buf[0])
+                ff_thread_release_buffer(ctx, &s->next_refs[i]);
+            if (s->s.refs[i].f->buf[0] &&
+                (res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
+                return res;
+        }
+        *got_frame = 1;
+        return pkt->size;
+    }
+    data += res;
+    size -= res;
+
+    if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
+        if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
+            vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
+        if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
+            (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
+            return res;
+    }
+    if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
+        vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR]);
+    if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
+        (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
+        return res;
+    if (s->s.frames[CUR_FRAME].tf.f->buf[0])
+        vp9_unref_frame(ctx, &s->s.frames[CUR_FRAME]);
+    if ((res = vp9_alloc_frame(ctx, &s->s.frames[CUR_FRAME])) < 0)
+        return res;
+    f = s->s.frames[CUR_FRAME].tf.f;
+    f->key_frame = s->s.h.keyframe;
+    f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    ls_y = f->linesize[0];
+    ls_uv =f->linesize[1];
+
+    if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
+        (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
+         s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
+        vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
+    }
+
+    // ref frame setup
+    for (i = 0; i < 8; i++) {
+        if (s->next_refs[i].f->buf[0])
+            ff_thread_release_buffer(ctx, &s->next_refs[i]);
+        if (s->s.h.refreshrefmask & (1 << i)) {
+            res = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
+        } else if (s->s.refs[i].f->buf[0]) {
+            res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
+        }
+        if (res < 0)
+            return res;
+    }
+
+    if (ctx->hwaccel) {
+        res = ctx->hwaccel->start_frame(ctx, NULL, 0);
+        if (res < 0)
+            return res;
+        res = ctx->hwaccel->decode_slice(ctx, pkt->data, pkt->size);
+        if (res < 0)
+            return res;
+        res = ctx->hwaccel->end_frame(ctx);
+        if (res < 0)
+            return res;
+        goto finish;
+    }
+
+    // main tile decode loop
+    bytesperpixel = s->bytesperpixel;
+    memset(s->above_partition_ctx, 0, s->cols);
+    memset(s->above_skip_ctx, 0, s->cols);
+    if (s->s.h.keyframe || s->s.h.intraonly) {
+        memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
+    } else {
+        memset(s->above_mode_ctx, NEARESTMV, s->cols);
+    }
+    memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
+    memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
+    memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
+    memset(s->above_segpred_ctx, 0, s->cols);
+    s->pass = s->s.frames[CUR_FRAME].uses_2pass =
+        ctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
+    if ((res = update_block_buffers(ctx)) < 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Failed to allocate block buffers\n");
+        return res;
+    }
+    if (s->s.h.refreshctx && s->s.h.parallelmode) {
+        int j, k, l, m;
+
+        for (i = 0; i < 4; i++) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++)
+                            memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
+                                   s->prob.coef[i][j][k][l][m], 3);
+            if (s->s.h.txfmmode == i)
+                break;
+        }
+        s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
+        ff_thread_finish_setup(ctx);
+    } else if (!s->s.h.refreshctx) {
+        ff_thread_finish_setup(ctx);
+    }
+
+    do {
+        yoff = uvoff = 0;
+        s->b = s->b_base;
+        s->block = s->block_base;
+        s->uvblock[0] = s->uvblock_base[0];
+        s->uvblock[1] = s->uvblock_base[1];
+        s->eob = s->eob_base;
+        s->uveob[0] = s->uveob_base[0];
+        s->uveob[1] = s->uveob_base[1];
+
+        for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
+            set_tile_offset(&s->tile_row_start, &s->tile_row_end,
+                            tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
+            if (s->pass != 2) {
+                for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
+                    int64_t tile_size;
+
+                    if (tile_col == s->s.h.tiling.tile_cols - 1 &&
+                        tile_row == s->s.h.tiling.tile_rows - 1) {
+                        tile_size = size;
+                    } else {
+                        tile_size = AV_RB32(data);
+                        data += 4;
+                        size -= 4;
+                    }
+                    if (tile_size > size) {
+                        ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
+                        return AVERROR_INVALIDDATA;
+                    }
+                    ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
+                    if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
+                        ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
+                        return AVERROR_INVALIDDATA;
+                    }
+                    data += tile_size;
+                    size -= tile_size;
+                }
+            }
+
+            for (row = s->tile_row_start; row < s->tile_row_end;
+                 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
+                struct VP9Filter *lflvl_ptr = s->lflvl;
+                ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
+
+                for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
+                    set_tile_offset(&s->tile_col_start, &s->tile_col_end,
+                                    tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
+
+                    if (s->pass != 2) {
+                        memset(s->left_partition_ctx, 0, 8);
+                        memset(s->left_skip_ctx, 0, 8);
+                        if (s->s.h.keyframe || s->s.h.intraonly) {
+                            memset(s->left_mode_ctx, DC_PRED, 16);
+                        } else {
+                            memset(s->left_mode_ctx, NEARESTMV, 8);
+                        }
+                        memset(s->left_y_nnz_ctx, 0, 16);
+                        memset(s->left_uv_nnz_ctx, 0, 32);
+                        memset(s->left_segpred_ctx, 0, 8);
+
+                        memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
+                    }
+
+                    for (col = s->tile_col_start;
+                         col < s->tile_col_end;
+                         col += 8, yoff2 += 64 * bytesperpixel,
+                         uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+                        // FIXME integrate with lf code (i.e. zero after each
+                        // use, similar to invtxfm coefficients, or similar)
+                        if (s->pass != 1) {
+                            memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
+                        }
+
+                        if (s->pass == 2) {
+                            decode_sb_mem(ctx, row, col, lflvl_ptr,
+                                          yoff2, uvoff2, BL_64X64);
+                        } else {
+                            decode_sb(ctx, row, col, lflvl_ptr,
+                                      yoff2, uvoff2, BL_64X64);
+                        }
+                    }
+                    if (s->pass != 2) {
+                        memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
+                    }
+                }
+
+                if (s->pass == 1) {
+                    continue;
+                }
+
+                // backup pre-loopfilter reconstruction data for intra
+                // prediction of next row of sb64s
+                if (row + 8 < s->rows) {
+                    memcpy(s->intra_pred_data[0],
+                           f->data[0] + yoff + 63 * ls_y,
+                           8 * s->cols * bytesperpixel);
+                    memcpy(s->intra_pred_data[1],
+                           f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+                           8 * s->cols * bytesperpixel >> s->ss_h);
+                    memcpy(s->intra_pred_data[2],
+                           f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+                           8 * s->cols * bytesperpixel >> s->ss_h);
+                }
+
+                // loopfilter one row
+                if (s->s.h.filter.level) {
+                    yoff2 = yoff;
+                    uvoff2 = uvoff;
+                    lflvl_ptr = s->lflvl;
+                    for (col = 0; col < s->cols;
+                         col += 8, yoff2 += 64 * bytesperpixel,
+                         uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+                        loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
+                    }
+                }
+
+                // FIXME maybe we can make this more finegrained by running the
+                // loopfilter per-block instead of after each sbrow
+                // In fact that would also make intra pred left preparation easier?
+                ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
+            }
+        }
+
+        if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
+            adapt_probs(s);
+            ff_thread_finish_setup(ctx);
+        }
+    } while (s->pass++ == 1);
+    ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
+
+finish:
+    // ref frame setup
+    for (i = 0; i < 8; i++) {
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_buffer(ctx, &s->s.refs[i]);
+        if (s->next_refs[i].f->buf[0] &&
+            (res = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
+            return res;
+    }
+
+    if (!s->s.h.invisible) {
+        if ((res = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
+            return res;
+        *got_frame = 1;
+    }
+
+    return pkt->size;
+}
+
+static void vp9_decode_flush(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    int i;
+
+    for (i = 0; i < 3; i++)
+        vp9_unref_frame(ctx, &s->s.frames[i]);
+    for (i = 0; i < 8; i++)
+        ff_thread_release_buffer(ctx, &s->s.refs[i]);
+}
+
+static int init_frames(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    int i;
+
+    for (i = 0; i < 3; i++) {
+        s->s.frames[i].tf.f = av_frame_alloc();
+        if (!s->s.frames[i].tf.f) {
+            vp9_decode_free(ctx);
+            av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
+            return AVERROR(ENOMEM);
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        s->s.refs[i].f = av_frame_alloc();
+        s->next_refs[i].f = av_frame_alloc();
+        if (!s->s.refs[i].f || !s->next_refs[i].f) {
+            vp9_decode_free(ctx);
+            av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+static av_cold int vp9_decode_init(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+
+    ctx->internal->allocate_progress = 1;
+    s->last_bpp = 0;
+    s->s.h.filter.sharpness = -1;
+
+    return init_frames(ctx);
+}
+
+#if HAVE_THREADS
+static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
+{
+    return init_frames(avctx);
+}
+
+static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    int i, res;
+    VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
+
+    for (i = 0; i < 3; i++) {
+        if (s->s.frames[i].tf.f->buf[0])
+            vp9_unref_frame(dst, &s->s.frames[i]);
+        if (ssrc->s.frames[i].tf.f->buf[0]) {
+            if ((res = vp9_ref_frame(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
+                return res;
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        if (s->s.refs[i].f->buf[0])
+            ff_thread_release_buffer(dst, &s->s.refs[i]);
+        if (ssrc->next_refs[i].f->buf[0]) {
+            if ((res = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
+                return res;
+        }
+    }
+
+    s->s.h.invisible = ssrc->s.h.invisible;
+    s->s.h.keyframe = ssrc->s.h.keyframe;
+    s->s.h.intraonly = ssrc->s.h.intraonly;
+    s->ss_v = ssrc->ss_v;
+    s->ss_h = ssrc->ss_h;
+    s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
+    s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
+    s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
+    s->bytesperpixel = ssrc->bytesperpixel;
+    s->gf_fmt = ssrc->gf_fmt;
+    s->w = ssrc->w;
+    s->h = ssrc->h;
+    s->bpp = ssrc->bpp;
+    s->bpp_index = ssrc->bpp_index;
+    s->pix_fmt = ssrc->pix_fmt;
+    memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
+    memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
+    memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
+           sizeof(s->s.h.segmentation.feat));
+
+    return 0;
+}
+#endif
+
+AVCodec ff_vp9_decoder = {
+    .name                  = "vp9",
+    .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_VP9,
+    .priv_data_size        = sizeof(VP9Context),
+    .init                  = vp9_decode_init,
+    .close                 = vp9_decode_free,
+    .decode                = vp9_decode_frame,
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .flush                 = vp9_decode_flush,
+    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
+};
diff --git a/media/ffvpx/libavcodec/vp9.h b/media/ffvpx/libavcodec/vp9.h
new file mode 100644
index 000000000..df5bd4d85
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9.h
@@ -0,0 +1,211 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9_H
+#define AVCODEC_VP9_H
+
+#include <stdint.h>
+
+#include "thread.h"
+#include "vp56.h"
+
+enum BlockLevel {
+    BL_64X64,
+    BL_32X32,
+    BL_16X16,
+    BL_8X8,
+};
+
+enum BlockPartition {
+    PARTITION_NONE,    // [ ] <-.
+    PARTITION_H,       // [-]   |
+    PARTITION_V,       // [|]   |
+    PARTITION_SPLIT,   // [+] --'
+};
+
+enum BlockSize {
+    BS_64x64,
+    BS_64x32,
+    BS_32x64,
+    BS_32x32,
+    BS_32x16,
+    BS_16x32,
+    BS_16x16,
+    BS_16x8,
+    BS_8x16,
+    BS_8x8,
+    BS_8x4,
+    BS_4x8,
+    BS_4x4,
+    N_BS_SIZES,
+};
+
+enum TxfmMode {
+    TX_4X4,
+    TX_8X8,
+    TX_16X16,
+    TX_32X32,
+    N_TXFM_SIZES,
+    TX_SWITCHABLE = N_TXFM_SIZES,
+    N_TXFM_MODES
+};
+
+enum TxfmType {
+    DCT_DCT,
+    DCT_ADST,
+    ADST_DCT,
+    ADST_ADST,
+    N_TXFM_TYPES
+};
+
+enum IntraPredMode {
+    VERT_PRED,
+    HOR_PRED,
+    DC_PRED,
+    DIAG_DOWN_LEFT_PRED,
+    DIAG_DOWN_RIGHT_PRED,
+    VERT_RIGHT_PRED,
+    HOR_DOWN_PRED,
+    VERT_LEFT_PRED,
+    HOR_UP_PRED,
+    TM_VP8_PRED,
+    LEFT_DC_PRED,
+    TOP_DC_PRED,
+    DC_128_PRED,
+    DC_127_PRED,
+    DC_129_PRED,
+    N_INTRA_PRED_MODES
+};
+
+enum InterPredMode {
+    NEARESTMV = 10,
+    NEARMV = 11,
+    ZEROMV = 12,
+    NEWMV = 13,
+};
+
+enum FilterMode {
+    FILTER_8TAP_SMOOTH,
+    FILTER_8TAP_REGULAR,
+    FILTER_8TAP_SHARP,
+    FILTER_BILINEAR,
+    FILTER_SWITCHABLE,
+};
+
+enum CompPredMode {
+    PRED_SINGLEREF,
+    PRED_COMPREF,
+    PRED_SWITCHABLE,
+};
+
+struct VP9mvrefPair {
+    VP56mv mv[2];
+    int8_t ref[2];
+};
+
+typedef struct VP9Frame {
+    ThreadFrame tf;
+    AVBufferRef *extradata;
+    uint8_t *segmentation_map;
+    struct VP9mvrefPair *mv;
+    int uses_2pass;
+
+    AVBufferRef *hwaccel_priv_buf;
+    void *hwaccel_picture_private;
+} VP9Frame;
+
+typedef struct VP9BitstreamHeader {
+    // bitstream header
+    uint8_t profile;
+    uint8_t keyframe;
+    uint8_t invisible;
+    uint8_t errorres;
+    uint8_t intraonly;
+    uint8_t resetctx;
+    uint8_t refreshrefmask;
+    uint8_t highprecisionmvs;
+    enum FilterMode filtermode;
+    uint8_t allowcompinter;
+    uint8_t refreshctx;
+    uint8_t parallelmode;
+    uint8_t framectxid;
+    uint8_t use_last_frame_mvs;
+    uint8_t refidx[3];
+    uint8_t signbias[3];
+    uint8_t fixcompref;
+    uint8_t varcompref[2];
+    struct {
+        uint8_t level;
+        int8_t sharpness;
+    } filter;
+    struct {
+        uint8_t enabled;
+        uint8_t updated;
+        int8_t mode[2];
+        int8_t ref[4];
+    } lf_delta;
+    uint8_t yac_qi;
+    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
+    uint8_t lossless;
+#define MAX_SEGMENT 8
+    struct {
+        uint8_t enabled;
+        uint8_t temporal;
+        uint8_t absolute_vals;
+        uint8_t update_map;
+        uint8_t prob[7];
+        uint8_t pred_prob[3];
+        struct {
+            uint8_t q_enabled;
+            uint8_t lf_enabled;
+            uint8_t ref_enabled;
+            uint8_t skip_enabled;
+            uint8_t ref_val;
+            int16_t q_val;
+            int8_t lf_val;
+            int16_t qmul[2][2];
+            uint8_t lflvl[4][2];
+        } feat[MAX_SEGMENT];
+    } segmentation;
+    enum TxfmMode txfmmode;
+    enum CompPredMode comppredmode;
+    struct {
+        unsigned log2_tile_cols, log2_tile_rows;
+        unsigned tile_cols, tile_rows;
+    } tiling;
+
+    int uncompressed_header_size;
+    int compressed_header_size;
+} VP9BitstreamHeader;
+
+typedef struct VP9SharedContext {
+    VP9BitstreamHeader h;
+
+    ThreadFrame refs[8];
+#define CUR_FRAME 0
+#define REF_FRAME_MVPAIR 1
+#define REF_FRAME_SEGMAP 2
+    VP9Frame frames[3];
+} VP9SharedContext;
+
+#endif /* AVCODEC_VP9_H */
diff --git a/media/ffvpx/libavcodec/vp9_mc_template.c b/media/ffvpx/libavcodec/vp9_mc_template.c
new file mode 100644
index 000000000..38d9a6da9
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9_mc_template.c
@@ -0,0 +1,435 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define ROUNDED_DIV_MVx2(a, b) \
+    (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
+#define ROUNDED_DIV_MVx4(a, b, c, d) \
+    (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
+               .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
+
+static void FN(inter_pred)(AVCodecContext *ctx)
+{
+    static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
+        { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
+        { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
+    };
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+    ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
+    AVFrame *ref1 = tref1->f, *ref2;
+    int w1 = ref1->width, h1 = ref1->height, w2, h2;
+    ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
+    int bytesperpixel = BYTES_PER_PIXEL;
+
+    if (b->comp) {
+        tref2 = &s->s.refs[s->s.h.refidx[b->ref[1]]];
+        ref2 = tref2->f;
+        w2 = ref2->width;
+        h2 = ref2->height;
+    }
+
+    // y inter pred
+    if (b->bs > BS_8x8) {
+        VP56mv uvmv;
+
+#if SCALED == 0
+        if (b->bs == BS_8x4) {
+            mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
+            mc_luma_dir(s, mc[3][b->filter][0],
+                        s->dst[0] + 4 * ls_y, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
+            w1 = (w1 + s->ss_h) >> s->ss_h;
+            if (s->ss_v) {
+                h1 = (h1 + 1) >> 1;
+                uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+                mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
+                              s->dst[1], s->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << 2, col << (3 - s->ss_h),
+                              &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
+            } else {
+                mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
+                              s->dst[1], s->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << 3, col << (3 - s->ss_h),
+                              &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
+                // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
+                // to get the motion vector for the bottom 4x4 block
+                // https://code.google.com/p/webm/issues/detail?id=993
+                if (s->ss_h == 0) {
+                    uvmv = b->mv[2][0];
+                } else {
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+                }
+                mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
+                              s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              (row << 3) + 4, col << (3 - s->ss_h),
+                              &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
+            }
+
+            if (b->comp) {
+                mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
+                mc_luma_dir(s, mc[3][b->filter][1],
+                            s->dst[0] + 4 * ls_y, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
+                w2 = (w2 + s->ss_h) >> s->ss_h;
+                if (s->ss_v) {
+                    h2 = (h2 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+                    mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << 2, col << (3 - s->ss_h),
+                                  &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
+                } else {
+                    mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << 3, col << (3 - s->ss_h),
+                                  &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
+                    // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
+                    // to get the motion vector for the bottom 4x4 block
+                    // https://code.google.com/p/webm/issues/detail?id=993
+                    if (s->ss_h == 0) {
+                        uvmv = b->mv[2][1];
+                    } else {
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+                    }
+                    mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
+                                  s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  (row << 3) + 4, col << (3 - s->ss_h),
+                                  &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
+                }
+            }
+        } else if (b->bs == BS_4x8) {
+            mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
+            mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
+            h1 = (h1 + s->ss_v) >> s->ss_v;
+            if (s->ss_h) {
+                w1 = (w1 + 1) >> 1;
+                uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
+                mc_chroma_dir(s, mc[4][b->filter][0],
+                              s->dst[1], s->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << (3 - s->ss_v), col << 2,
+                              &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
+            } else {
+                mc_chroma_dir(s, mc[4][b->filter][0],
+                              s->dst[1], s->dst[2], ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << (3 - s->ss_v), col << 3,
+                              &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
+                mc_chroma_dir(s, mc[4][b->filter][0],
+                              s->dst[1] + 4 * bytesperpixel,
+                              s->dst[2] + 4 * bytesperpixel, ls_uv,
+                              ref1->data[1], ref1->linesize[1],
+                              ref1->data[2], ref1->linesize[2], tref1,
+                              row << (3 - s->ss_v), (col << 3) + 4,
+                              &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
+            }
+
+            if (b->comp) {
+                mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
+                mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
+                h2 = (h2 + s->ss_v) >> s->ss_v;
+                if (s->ss_h) {
+                    w2 = (w2 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
+                    mc_chroma_dir(s, mc[4][b->filter][1],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << (3 - s->ss_v), col << 2,
+                                  &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
+                } else {
+                    mc_chroma_dir(s, mc[4][b->filter][1],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << (3 - s->ss_v), col << 3,
+                                  &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
+                    mc_chroma_dir(s, mc[4][b->filter][1],
+                                  s->dst[1] + 4 * bytesperpixel,
+                                  s->dst[2] + 4 * bytesperpixel, ls_uv,
+                                  ref2->data[1], ref2->linesize[1],
+                                  ref2->data[2], ref2->linesize[2], tref2,
+                                  row << (3 - s->ss_v), (col << 3) + 4,
+                                  &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
+                }
+            }
+        } else
+#endif
+        {
+            av_assert2(b->bs == BS_4x4);
+
+            // FIXME if two horizontally adjacent blocks have the same MV,
+            // do a w8 instead of a w4 call
+            mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0],
+                        0, 0, 8, 8, 4, 4, w1, h1, 0);
+            mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, (col << 3) + 4, &b->mv[1][0],
+                        4, 0, 8, 8, 4, 4, w1, h1, 0);
+            mc_luma_dir(s, mc[4][b->filter][0],
+                        s->dst[0] + 4 * ls_y, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, col << 3, &b->mv[2][0],
+                        0, 4, 8, 8, 4, 4, w1, h1, 0);
+            mc_luma_dir(s, mc[4][b->filter][0],
+                        s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
+                        4, 4, 8, 8, 4, 4, w1, h1, 0);
+            if (s->ss_v) {
+                h1 = (h1 + 1) >> 1;
+                if (s->ss_h) {
+                    w1 = (w1 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
+                                            b->mv[2][0], b->mv[3][0]);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 2, col << 2,
+                                  &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
+                } else {
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 2, col << 3,
+                                  &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1] + 4 * bytesperpixel,
+                                  s->dst[2] + 4 * bytesperpixel, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 2, (col << 3) + 4,
+                                  &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
+                }
+            } else {
+                if (s->ss_h) {
+                    w1 = (w1 + 1) >> 1;
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 3, col << 2,
+                                  &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
+                    // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
+                    // bottom block
+                    // https://code.google.com/p/webm/issues/detail?id=993
+                    uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  (row << 3) + 4, col << 2,
+                                  &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
+                } else {
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1], s->dst[2], ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 3, col << 3,
+                                  &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1] + 4 * bytesperpixel,
+                                  s->dst[2] + 4 * bytesperpixel, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  row << 3, (col << 3) + 4,
+                                  &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  (row << 3) + 4, col << 3,
+                                  &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
+                    mc_chroma_dir(s, mc[4][b->filter][0],
+                                  s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
+                                  s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
+                                  ref1->data[1], ref1->linesize[1],
+                                  ref1->data[2], ref1->linesize[2], tref1,
+                                  (row << 3) + 4, (col << 3) + 4,
+                                  &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
+                }
+            }
+
+            if (b->comp) {
+                mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
+                mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
+                mc_luma_dir(s, mc[4][b->filter][1],
+                            s->dst[0] + 4 * ls_y, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
+                mc_luma_dir(s, mc[4][b->filter][1],
+                            s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
+                if (s->ss_v) {
+                    h2 = (h2 + 1) >> 1;
+                    if (s->ss_h) {
+                        w2 = (w2 + 1) >> 1;
+                        uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
+                                                b->mv[2][1], b->mv[3][1]);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1], s->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 2, col << 2,
+                                      &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
+                    } else {
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1], s->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 2, col << 3,
+                                      &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1] + 4 * bytesperpixel,
+                                      s->dst[2] + 4 * bytesperpixel, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 2, (col << 3) + 4,
+                                      &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
+                    }
+                } else {
+                    if (s->ss_h) {
+                        w2 = (w2 + 1) >> 1;
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1], s->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 3, col << 2,
+                                      &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
+                        // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
+                        // bottom block
+                        // https://code.google.com/p/webm/issues/detail?id=993
+                        uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      (row << 3) + 4, col << 2,
+                                      &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
+                    } else {
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1], s->dst[2], ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 3, col << 3,
+                                      &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1] + 4 * bytesperpixel,
+                                      s->dst[2] + 4 * bytesperpixel, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      row << 3, (col << 3) + 4,
+                                      &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      (row << 3) + 4, col << 3,
+                                      &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
+                        mc_chroma_dir(s, mc[4][b->filter][1],
+                                      s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
+                                      s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
+                                      ref2->data[1], ref2->linesize[1],
+                                      ref2->data[2], ref2->linesize[2], tref2,
+                                      (row << 3) + 4, (col << 3) + 4,
+                                      &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
+                    }
+                }
+            }
+        }
+    } else {
+        int bwl = bwlog_tab[0][b->bs];
+        int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
+        int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = bwh_tab[s->ss_v][b->bs][1] * 4;
+
+        mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
+                    ref1->data[0], ref1->linesize[0], tref1,
+                    row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
+        w1 = (w1 + s->ss_h) >> s->ss_h;
+        h1 = (h1 + s->ss_v) >> s->ss_v;
+        mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0],
+                      s->dst[1], s->dst[2], ls_uv,
+                      ref1->data[1], ref1->linesize[1],
+                      ref1->data[2], ref1->linesize[2], tref1,
+                      row << (3 - s->ss_v), col << (3 - s->ss_h),
+                      &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
+
+        if (b->comp) {
+            mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
+                        ref2->data[0], ref2->linesize[0], tref2,
+                        row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
+            w2 = (w2 + s->ss_h) >> s->ss_h;
+            h2 = (h2 + s->ss_v) >> s->ss_v;
+            mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1],
+                          s->dst[1], s->dst[2], ls_uv,
+                          ref2->data[1], ref2->linesize[1],
+                          ref2->data[2], ref2->linesize[2], tref2,
+                          row << (3 - s->ss_v), col << (3 - s->ss_h),
+                          &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/vp9_parser.c b/media/ffvpx/libavcodec/vp9_parser.c
new file mode 100644
index 000000000..9900e7ab1
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9_parser.c
@@ -0,0 +1,173 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/get_bits.h"
+#include "parser.h"
+
+typedef struct VP9ParseContext {
+    int n_frames; // 1-8
+    int size[8];
+    int marker_size;
+    int64_t pts;
+} VP9ParseContext;
+
+static int parse_frame(AVCodecParserContext *ctx, const uint8_t *buf, int size)
+{
+    VP9ParseContext *s = ctx->priv_data;
+    GetBitContext gb;
+    int res, profile, keyframe, invisible;
+
+    if ((res = init_get_bits8(&gb, buf, size)) < 0)
+        return res;
+    get_bits(&gb, 2); // frame marker
+    profile  = get_bits1(&gb);
+    profile |= get_bits1(&gb) << 1;
+    if (profile == 3) profile += get_bits1(&gb);
+
+    if (get_bits1(&gb)) {
+        keyframe = 0;
+        invisible = 0;
+    } else {
+        keyframe  = !get_bits1(&gb);
+        invisible = !get_bits1(&gb);
+    }
+
+    if (!keyframe) {
+        ctx->pict_type = AV_PICTURE_TYPE_P;
+        ctx->key_frame = 0;
+    } else {
+        ctx->pict_type = AV_PICTURE_TYPE_I;
+        ctx->key_frame = 1;
+    }
+
+    if (!invisible) {
+        if (ctx->pts == AV_NOPTS_VALUE)
+            ctx->pts = s->pts;
+        s->pts = AV_NOPTS_VALUE;
+    } else if (ctx->pts != AV_NOPTS_VALUE) {
+        s->pts = ctx->pts;
+        ctx->pts = AV_NOPTS_VALUE;
+    }
+
+    return 0;
+}
+
+static int parse(AVCodecParserContext *ctx,
+                 AVCodecContext *avctx,
+                 const uint8_t **out_data, int *out_size,
+                 const uint8_t *data, int size)
+{
+    VP9ParseContext *s = ctx->priv_data;
+    int full_size = size;
+    int marker;
+
+    if (size <= 0) {
+        *out_size = 0;
+        *out_data = data;
+
+        return 0;
+    }
+
+    if (s->n_frames > 0) {
+        int i;
+        int size_sum = 0;
+
+        for (i = 0; i < s->n_frames ;i++)
+            size_sum += s->size[i];
+        size_sum += s->marker_size;
+
+        if (size_sum != size) {
+            av_log(avctx, AV_LOG_ERROR, "Inconsistent input frame sizes %d %d\n",
+                   size_sum, size);
+            s->n_frames = 0;
+        }
+    }
+
+    if (s->n_frames > 0) {
+        *out_data = data;
+        *out_size = s->size[--s->n_frames];
+        parse_frame(ctx, *out_data, *out_size);
+
+        return s->n_frames > 0 ? *out_size : size /* i.e. include idx tail */;
+    }
+
+    marker = data[size - 1];
+    if ((marker & 0xe0) == 0xc0) {
+        int nbytes = 1 + ((marker >> 3) & 0x3);
+        int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
+
+        if (size >= idx_sz && data[size - idx_sz] == marker) {
+            const uint8_t *idx = data + size + 1 - idx_sz;
+            int first = 1;
+
+            switch (nbytes) {
+#define case_n(a, rd) \
+            case a: \
+                while (n_frames--) { \
+                    unsigned sz = rd; \
+                    idx += a; \
+                    if (sz == 0 || sz > size) { \
+                        s->n_frames = 0; \
+                        *out_size = size; \
+                        *out_data = data; \
+                        av_log(avctx, AV_LOG_ERROR, \
+                               "Invalid superframe packet size: %u frame size: %d\n", \
+                               sz, size); \
+                        return full_size; \
+                    } \
+                    if (first) { \
+                        first = 0; \
+                        *out_data = data; \
+                        *out_size = sz; \
+                        s->n_frames = n_frames; \
+                    } else { \
+                        s->size[n_frames] = sz; \
+                    } \
+                    data += sz; \
+                    size -= sz; \
+                } \
+                s->marker_size = size; \
+                parse_frame(ctx, *out_data, *out_size); \
+                return s->n_frames > 0 ? *out_size : full_size
+
+                case_n(1, *idx);
+                case_n(2, AV_RL16(idx));
+                case_n(3, AV_RL24(idx));
+                case_n(4, AV_RL32(idx));
+            }
+        }
+    }
+
+    *out_data = data;
+    *out_size = size;
+    parse_frame(ctx, data, size);
+
+    return size;
+}
+
+AVCodecParser ff_vp9_parser = {
+    .codec_ids      = { AV_CODEC_ID_VP9 },
+    .priv_data_size = sizeof(VP9ParseContext),
+    .parser_parse   = parse,
+};
diff --git a/media/ffvpx/libavcodec/vp9data.h b/media/ffvpx/libavcodec/vp9data.h
new file mode 100644
index 000000000..cb12e7e94
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9data.h
@@ -0,0 +1,2277 @@
+/*
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9DATA_H
+#define AVCODEC_VP9DATA_H
+
+#include <stdint.h>
+
+#include "vp9.h"
+
+static const int8_t vp9_partition_tree[3][2] = {
+    { -PARTITION_NONE, 1 },               // '0'
+     { -PARTITION_H, 2 },                 // '10'
+      { -PARTITION_V, -PARTITION_SPLIT }, // '110', '111'
+};
+
+static const uint8_t vp9_default_kf_partition_probs[4][4][3] = {
+    { /* 64x64 -> 32x32 */
+        { 174,  35,  49 } /* a/l both not split */,
+        {  68,  11,  27 } /* a split, l not split */,
+        {  57,  15,   9 } /* l split, a not split */,
+        {  12,   3,   3 } /* a/l both split */
+    }, { /* 32x32 -> 16x16 */
+        { 150,  40,  39 } /* a/l both not split */,
+        {  78,  12,  26 } /* a split, l not split */,
+        {  67,  33,  11 } /* l split, a not split */,
+        {  24,   7,   5 } /* a/l both split */,
+    }, { /* 16x16 -> 8x8 */
+        { 149,  53,  53 } /* a/l both not split */,
+        {  94,  20,  48 } /* a split, l not split */,
+        {  83,  53,  24 } /* l split, a not split */,
+        {  52,  18,  18 } /* a/l both split */,
+    }, { /* 8x8 -> 4x4 */
+        { 158,  97,  94 } /* a/l both not split */,
+        {  93,  24,  99 } /* a split, l not split */,
+        {  85, 119,  44 } /* l split, a not split */,
+        {  62,  59,  67 } /* a/l both split */,
+    },
+};
+
+static const int8_t vp9_segmentation_tree[7][2] = {
+    { 1, 2 },
+     { 3, 4 },
+     { 5, 6 },
+      { -0, -1 }, // '00x'
+      { -2, -3 }, // '01x'
+      { -4, -5 }, // '10x'
+      { -6, -7 }, // '11x'
+};
+
+static const int8_t vp9_intramode_tree[9][2] = {
+    { -DC_PRED, 1 },                                  // '0'
+     { -TM_VP8_PRED, 2 },                             // '10'
+      { -VERT_PRED, 3 },                              // '110'
+       { 4, 6 },
+        { -HOR_PRED, 5 },                             // '11100'
+         { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '11101x'
+        { -DIAG_DOWN_LEFT_PRED, 7 },                  // '11110'
+         { -VERT_LEFT_PRED, 8 },                      // '111110'
+          { -HOR_DOWN_PRED, -HOR_UP_PRED },           // '111111x'
+};
+
+static const uint8_t vp9_default_kf_ymode_probs[10][10][9] = {
+    { /* above = v */
+        {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
+        {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
+        {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
+        {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
+        {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
+        {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
+        {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
+        {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
+        {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d27 */,
+        {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
+    }, { /* above = h */
+        {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
+        {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
+        {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
+        {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
+        {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
+        {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
+        {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
+        {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
+        {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d27 */,
+        {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
+    }, { /* above = dc */
+        {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
+        {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
+        { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
+        {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
+        {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
+        {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
+        {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
+        {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
+        {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d27 */,
+        {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
+    }, { /* above = d45 */
+        {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
+        {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
+        { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
+        {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
+        {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
+        {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
+        {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
+        {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
+        {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d27 */,
+        {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
+    }, { /* above = d135 */
+        {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
+        {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
+        {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
+        {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
+        {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
+        {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
+        {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
+        {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
+        {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d27 */,
+        {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
+    }, { /* above = d117 */
+        {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
+        {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
+        {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
+        {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
+        {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
+        {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
+        {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
+        {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
+        {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d27 */,
+        {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
+    }, { /* above = d153 */
+        {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
+        {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
+        {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
+        {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
+        {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
+        {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
+        {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
+        {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
+        {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d27 */,
+        {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
+    }, { /* above = d63 */
+        {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
+        {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
+        {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
+        {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
+        {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
+        {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
+        {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
+        {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
+        {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d27 */,
+        {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
+    }, { /* above = d27 */
+        {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
+        {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
+        {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
+        {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
+        {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
+        {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
+        {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
+        {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
+        {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d27 */,
+        {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
+    }, { /* above = tm */
+        {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
+        {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
+        {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
+        {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
+        {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
+        {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
+        {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
+        {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
+        {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d27 */,
+        {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
+    }
+};
+
+static const uint8_t vp9_default_kf_uvmode_probs[10][9] = {
+    { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
+    { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
+    { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
+    { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
+    { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
+    { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
+    { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
+    { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
+    { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d27 */,
+    { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
+};
+
+static const int8_t vp9_inter_mode_tree[3][2] = {
+    { -ZEROMV, 1 },        // '0'
+     { -NEARESTMV, 2 },    // '10'
+      { -NEARMV, -NEWMV }, // '11x'
+};
+
+static const int8_t vp9_filter_tree[2][2] = {
+    { -0, 1 },   // '0'
+     { -1, -2 }, // '1x'
+};
+
+static const enum FilterMode vp9_filter_lut[3] = {
+    FILTER_8TAP_REGULAR,
+    FILTER_8TAP_SMOOTH,
+    FILTER_8TAP_SHARP,
+};
+
+static const int16_t vp9_dc_qlookup[3][256] = {
+    {
+            4,     8,     8,     9,    10,    11,    12,    12,
+           13,    14,    15,    16,    17,    18,    19,    19,
+           20,    21,    22,    23,    24,    25,    26,    26,
+           27,    28,    29,    30,    31,    32,    32,    33,
+           34,    35,    36,    37,    38,    38,    39,    40,
+           41,    42,    43,    43,    44,    45,    46,    47,
+           48,    48,    49,    50,    51,    52,    53,    53,
+           54,    55,    56,    57,    57,    58,    59,    60,
+           61,    62,    62,    63,    64,    65,    66,    66,
+           67,    68,    69,    70,    70,    71,    72,    73,
+           74,    74,    75,    76,    77,    78,    78,    79,
+           80,    81,    81,    82,    83,    84,    85,    85,
+           87,    88,    90,    92,    93,    95,    96,    98,
+           99,   101,   102,   104,   105,   107,   108,   110,
+          111,   113,   114,   116,   117,   118,   120,   121,
+          123,   125,   127,   129,   131,   134,   136,   138,
+          140,   142,   144,   146,   148,   150,   152,   154,
+          156,   158,   161,   164,   166,   169,   172,   174,
+          177,   180,   182,   185,   187,   190,   192,   195,
+          199,   202,   205,   208,   211,   214,   217,   220,
+          223,   226,   230,   233,   237,   240,   243,   247,
+          250,   253,   257,   261,   265,   269,   272,   276,
+          280,   284,   288,   292,   296,   300,   304,   309,
+          313,   317,   322,   326,   330,   335,   340,   344,
+          349,   354,   359,   364,   369,   374,   379,   384,
+          389,   395,   400,   406,   411,   417,   423,   429,
+          435,   441,   447,   454,   461,   467,   475,   482,
+          489,   497,   505,   513,   522,   530,   539,   549,
+          559,   569,   579,   590,   602,   614,   626,   640,
+          654,   668,   684,   700,   717,   736,   755,   775,
+          796,   819,   843,   869,   896,   925,   955,   988,
+         1022,  1058,  1098,  1139,  1184,  1232,  1282,  1336,
+    }, {
+            4,     9,    10,    13,    15,    17,    20,    22,
+           25,    28,    31,    34,    37,    40,    43,    47,
+           50,    53,    57,    60,    64,    68,    71,    75,
+           78,    82,    86,    90,    93,    97,   101,   105,
+          109,   113,   116,   120,   124,   128,   132,   136,
+          140,   143,   147,   151,   155,   159,   163,   166,
+          170,   174,   178,   182,   185,   189,   193,   197,
+          200,   204,   208,   212,   215,   219,   223,   226,
+          230,   233,   237,   241,   244,   248,   251,   255,
+          259,   262,   266,   269,   273,   276,   280,   283,
+          287,   290,   293,   297,   300,   304,   307,   310,
+          314,   317,   321,   324,   327,   331,   334,   337,
+          343,   350,   356,   362,   369,   375,   381,   387,
+          394,   400,   406,   412,   418,   424,   430,   436,
+          442,   448,   454,   460,   466,   472,   478,   484,
+          490,   499,   507,   516,   525,   533,   542,   550,
+          559,   567,   576,   584,   592,   601,   609,   617,
+          625,   634,   644,   655,   666,   676,   687,   698,
+          708,   718,   729,   739,   749,   759,   770,   782,
+          795,   807,   819,   831,   844,   856,   868,   880,
+          891,   906,   920,   933,   947,   961,   975,   988,
+         1001,  1015,  1030,  1045,  1061,  1076,  1090,  1105,
+         1120,  1137,  1153,  1170,  1186,  1202,  1218,  1236,
+         1253,  1271,  1288,  1306,  1323,  1342,  1361,  1379,
+         1398,  1416,  1436,  1456,  1476,  1496,  1516,  1537,
+         1559,  1580,  1601,  1624,  1647,  1670,  1692,  1717,
+         1741,  1766,  1791,  1817,  1844,  1871,  1900,  1929,
+         1958,  1990,  2021,  2054,  2088,  2123,  2159,  2197,
+         2236,  2276,  2319,  2363,  2410,  2458,  2508,  2561,
+         2616,  2675,  2737,  2802,  2871,  2944,  3020,  3102,
+         3188,  3280,  3375,  3478,  3586,  3702,  3823,  3953,
+         4089,  4236,  4394,  4559,  4737,  4929,  5130,  5347,
+    }, {
+            4,    12,    18,    25,    33,    41,    50,    60,
+           70,    80,    91,   103,   115,   127,   140,   153,
+          166,   180,   194,   208,   222,   237,   251,   266,
+          281,   296,   312,   327,   343,   358,   374,   390,
+          405,   421,   437,   453,   469,   484,   500,   516,
+          532,   548,   564,   580,   596,   611,   627,   643,
+          659,   674,   690,   706,   721,   737,   752,   768,
+          783,   798,   814,   829,   844,   859,   874,   889,
+          904,   919,   934,   949,   964,   978,   993,  1008,
+         1022,  1037,  1051,  1065,  1080,  1094,  1108,  1122,
+         1136,  1151,  1165,  1179,  1192,  1206,  1220,  1234,
+         1248,  1261,  1275,  1288,  1302,  1315,  1329,  1342,
+         1368,  1393,  1419,  1444,  1469,  1494,  1519,  1544,
+         1569,  1594,  1618,  1643,  1668,  1692,  1717,  1741,
+         1765,  1789,  1814,  1838,  1862,  1885,  1909,  1933,
+         1957,  1992,  2027,  2061,  2096,  2130,  2165,  2199,
+         2233,  2267,  2300,  2334,  2367,  2400,  2434,  2467,
+         2499,  2532,  2575,  2618,  2661,  2704,  2746,  2788,
+         2830,  2872,  2913,  2954,  2995,  3036,  3076,  3127,
+         3177,  3226,  3275,  3324,  3373,  3421,  3469,  3517,
+         3565,  3621,  3677,  3733,  3788,  3843,  3897,  3951,
+         4005,  4058,  4119,  4181,  4241,  4301,  4361,  4420,
+         4479,  4546,  4612,  4677,  4742,  4807,  4871,  4942,
+         5013,  5083,  5153,  5222,  5291,  5367,  5442,  5517,
+         5591,  5665,  5745,  5825,  5905,  5984,  6063,  6149,
+         6234,  6319,  6404,  6495,  6587,  6678,  6769,  6867,
+         6966,  7064,  7163,  7269,  7376,  7483,  7599,  7715,
+         7832,  7958,  8085,  8214,  8352,  8492,  8635,  8788,
+         8945,  9104,  9275,  9450,  9639,  9832, 10031, 10245,
+        10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409,
+        12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812,
+        16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387,
+    }
+};
+
+static const int16_t vp9_ac_qlookup[3][256] = {
+    {
+            4,     8,     9,    10,    11,    12,    13,    14,
+           15,    16,    17,    18,    19,    20,    21,    22,
+           23,    24,    25,    26,    27,    28,    29,    30,
+           31,    32,    33,    34,    35,    36,    37,    38,
+           39,    40,    41,    42,    43,    44,    45,    46,
+           47,    48,    49,    50,    51,    52,    53,    54,
+           55,    56,    57,    58,    59,    60,    61,    62,
+           63,    64,    65,    66,    67,    68,    69,    70,
+           71,    72,    73,    74,    75,    76,    77,    78,
+           79,    80,    81,    82,    83,    84,    85,    86,
+           87,    88,    89,    90,    91,    92,    93,    94,
+           95,    96,    97,    98,    99,   100,   101,   102,
+          104,   106,   108,   110,   112,   114,   116,   118,
+          120,   122,   124,   126,   128,   130,   132,   134,
+          136,   138,   140,   142,   144,   146,   148,   150,
+          152,   155,   158,   161,   164,   167,   170,   173,
+          176,   179,   182,   185,   188,   191,   194,   197,
+          200,   203,   207,   211,   215,   219,   223,   227,
+          231,   235,   239,   243,   247,   251,   255,   260,
+          265,   270,   275,   280,   285,   290,   295,   300,
+          305,   311,   317,   323,   329,   335,   341,   347,
+          353,   359,   366,   373,   380,   387,   394,   401,
+          408,   416,   424,   432,   440,   448,   456,   465,
+          474,   483,   492,   501,   510,   520,   530,   540,
+          550,   560,   571,   582,   593,   604,   615,   627,
+          639,   651,   663,   676,   689,   702,   715,   729,
+          743,   757,   771,   786,   801,   816,   832,   848,
+          864,   881,   898,   915,   933,   951,   969,   988,
+         1007,  1026,  1046,  1066,  1087,  1108,  1129,  1151,
+         1173,  1196,  1219,  1243,  1267,  1292,  1317,  1343,
+         1369,  1396,  1423,  1451,  1479,  1508,  1537,  1567,
+         1597,  1628,  1660,  1692,  1725,  1759,  1793,  1828,
+    }, {
+            4,     9,    11,    13,    16,    18,    21,    24,
+           27,    30,    33,    37,    40,    44,    48,    51,
+           55,    59,    63,    67,    71,    75,    79,    83,
+           88,    92,    96,   100,   105,   109,   114,   118,
+          122,   127,   131,   136,   140,   145,   149,   154,
+          158,   163,   168,   172,   177,   181,   186,   190,
+          195,   199,   204,   208,   213,   217,   222,   226,
+          231,   235,   240,   244,   249,   253,   258,   262,
+          267,   271,   275,   280,   284,   289,   293,   297,
+          302,   306,   311,   315,   319,   324,   328,   332,
+          337,   341,   345,   349,   354,   358,   362,   367,
+          371,   375,   379,   384,   388,   392,   396,   401,
+          409,   417,   425,   433,   441,   449,   458,   466,
+          474,   482,   490,   498,   506,   514,   523,   531,
+          539,   547,   555,   563,   571,   579,   588,   596,
+          604,   616,   628,   640,   652,   664,   676,   688,
+          700,   713,   725,   737,   749,   761,   773,   785,
+          797,   809,   825,   841,   857,   873,   889,   905,
+          922,   938,   954,   970,   986,  1002,  1018,  1038,
+         1058,  1078,  1098,  1118,  1138,  1158,  1178,  1198,
+         1218,  1242,  1266,  1290,  1314,  1338,  1362,  1386,
+         1411,  1435,  1463,  1491,  1519,  1547,  1575,  1603,
+         1631,  1663,  1695,  1727,  1759,  1791,  1823,  1859,
+         1895,  1931,  1967,  2003,  2039,  2079,  2119,  2159,
+         2199,  2239,  2283,  2327,  2371,  2415,  2459,  2507,
+         2555,  2603,  2651,  2703,  2755,  2807,  2859,  2915,
+         2971,  3027,  3083,  3143,  3203,  3263,  3327,  3391,
+         3455,  3523,  3591,  3659,  3731,  3803,  3876,  3952,
+         4028,  4104,  4184,  4264,  4348,  4432,  4516,  4604,
+         4692,  4784,  4876,  4972,  5068,  5168,  5268,  5372,
+         5476,  5584,  5692,  5804,  5916,  6032,  6148,  6268,
+         6388,  6512,  6640,  6768,  6900,  7036,  7172,  7312,
+    }, {
+            4,    13,    19,    27,    35,    44,    54,    64,
+           75,    87,    99,   112,   126,   139,   154,   168,
+          183,   199,   214,   230,   247,   263,   280,   297,
+          314,   331,   349,   366,   384,   402,   420,   438,
+          456,   475,   493,   511,   530,   548,   567,   586,
+          604,   623,   642,   660,   679,   698,   716,   735,
+          753,   772,   791,   809,   828,   846,   865,   884,
+          902,   920,   939,   957,   976,   994,  1012,  1030,
+         1049,  1067,  1085,  1103,  1121,  1139,  1157,  1175,
+         1193,  1211,  1229,  1246,  1264,  1282,  1299,  1317,
+         1335,  1352,  1370,  1387,  1405,  1422,  1440,  1457,
+         1474,  1491,  1509,  1526,  1543,  1560,  1577,  1595,
+         1627,  1660,  1693,  1725,  1758,  1791,  1824,  1856,
+         1889,  1922,  1954,  1987,  2020,  2052,  2085,  2118,
+         2150,  2183,  2216,  2248,  2281,  2313,  2346,  2378,
+         2411,  2459,  2508,  2556,  2605,  2653,  2701,  2750,
+         2798,  2847,  2895,  2943,  2992,  3040,  3088,  3137,
+         3185,  3234,  3298,  3362,  3426,  3491,  3555,  3619,
+         3684,  3748,  3812,  3876,  3941,  4005,  4069,  4149,
+         4230,  4310,  4390,  4470,  4550,  4631,  4711,  4791,
+         4871,  4967,  5064,  5160,  5256,  5352,  5448,  5544,
+         5641,  5737,  5849,  5961,  6073,  6185,  6297,  6410,
+         6522,  6650,  6778,  6906,  7034,  7162,  7290,  7435,
+         7579,  7723,  7867,  8011,  8155,  8315,  8475,  8635,
+         8795,  8956,  9132,  9308,  9484,  9660,  9836, 10028,
+        10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661,
+        11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565,
+        13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806,
+        16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414,
+        18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486,
+        21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070,
+        25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247,
+    }
+};
+
+static const enum TxfmType vp9_intra_txfm_type[14] = {
+    [VERT_PRED]            = ADST_DCT,
+    [HOR_PRED]             = DCT_ADST,
+    [DC_PRED]              = DCT_DCT,
+    [DIAG_DOWN_LEFT_PRED]  = DCT_DCT,
+    [DIAG_DOWN_RIGHT_PRED] = ADST_ADST,
+    [VERT_RIGHT_PRED]      = ADST_DCT,
+    [HOR_DOWN_PRED]        = DCT_ADST,
+    [VERT_LEFT_PRED]       = ADST_DCT,
+    [HOR_UP_PRED]          = DCT_ADST,
+    [TM_VP8_PRED]          = ADST_ADST,
+    [NEARESTMV]            = DCT_DCT,
+    [NEARMV]               = DCT_DCT,
+    [ZEROMV]               = DCT_DCT,
+    [NEWMV]                = DCT_DCT,
+};
+
+static const int16_t vp9_default_scan_4x4[16] = {
+     0,  1,  4,  5,
+     2,  8,  3,  6,
+    12,  9,  7, 10,
+    13, 11, 14, 15,
+};
+
+static const int16_t vp9_col_scan_4x4[16] = {
+     0,  1,  2,  4,
+     3,  5,  6,  8,
+     7,  9, 10, 12,
+    13, 11, 14, 15,
+};
+
+static const int16_t vp9_row_scan_4x4[16] = {
+     0,  4,  1,  8,
+     5, 12,  9,  2,
+     6, 13,  3, 10,
+     7, 14, 11, 15,
+};
+
+static const int16_t vp9_default_scan_8x8[64] = {
+     0,  1,  8,  2,  9, 16, 10,  3,
+    17, 24, 18, 11,  4, 25, 32, 19,
+    12, 26,  5, 33, 20, 27, 40, 13,
+    34,  6, 41, 28, 21, 35, 42, 48,
+    14,  7, 36, 29, 43, 56, 49, 22,
+    15, 37, 50, 44, 57, 30, 23, 51,
+    45, 58, 38, 31, 52, 59, 39, 46,
+    53, 60, 47, 54, 61, 55, 62, 63,
+};
+
+static const int16_t vp9_col_scan_8x8[64] = {
+     0,  1,  2,  8,  3,  9,  4, 10,
+    16,  5, 11, 17, 12, 18,  6, 24,
+    19, 13, 25,  7, 26, 20, 32, 14,
+    27, 21, 33, 28, 34, 15, 22, 35,
+    40, 29, 41, 36, 23, 30, 42, 37,
+    48, 43, 31, 44, 49, 38, 50, 56,
+    45, 39, 51, 57, 52, 46, 58, 53,
+    59, 47, 60, 54, 61, 55, 62, 63,
+};
+
+static const int16_t vp9_row_scan_8x8[64] = {
+     0,  8, 16,  1,  9, 24,  2, 17,
+    32, 10, 25,  3, 40, 18, 11, 33,
+    26, 19,  4, 48, 41, 34, 12, 27,
+    56, 20,  5, 42, 35, 13, 49, 28,
+     6, 21, 43, 36, 14, 50, 29, 57,
+     7, 44, 22, 37, 51, 15, 58, 30,
+    23, 45, 52, 38, 59, 31, 46, 53,
+    39, 60, 47, 61, 54, 62, 55, 63,
+};
+
+static const int16_t vp9_default_scan_16x16[256] = {
+      0,   1,  16,   2,  17,  32,   3,  18,  33,  48,   4,  34,  19,  49,  20,   5,
+     35,  64,  50,  36,  65,  21,   6,  51,  80,  66,  37,  22,  52,   7,  81,  67,
+     38,  82,  53,  23,  96,  68,   8,  83,  97,  54,  39,  69, 112,  24,  98,  84,
+     70,  55,   9,  40,  85,  99, 113, 128,  25, 114, 100,  71,  86,  56,  10,  41,
+    115, 101, 129, 116,  72,  87,  26, 130, 144, 102,  57,  11,  42, 117, 131, 145,
+     88, 103,  27,  73, 132, 118, 146,  58, 160,  12,  43, 133, 147, 104,  89, 119,
+    161,  74, 148, 134,  28, 162,  59,  13, 176, 120, 149,  90, 135, 105, 163,  44,
+     75, 177, 164,  29, 150, 121, 136, 178, 165,  14, 106,  60,  91, 151,  45, 179,
+    192, 137, 166, 122,  76, 180, 152,  30,  61,  15, 107, 167, 181, 193,  92, 208,
+     46, 138, 123, 153, 194,  77, 168, 182,  31, 195, 209, 183, 108, 139,  62, 154,
+     47, 196,  93, 169, 210, 197, 224, 124, 184, 211,  78, 109, 170, 155,  63, 198,
+    212, 185, 225, 240, 140,  94, 199, 125,  79, 213, 226, 171, 186, 156, 214, 200,
+    110, 227, 141,  95, 241, 215, 228, 201, 126, 242, 187, 172, 157, 229, 111, 216,
+    243, 142, 202, 230, 127, 217, 244, 173, 188, 231, 158, 203, 143, 245, 218, 232,
+    189, 246, 159, 174, 233, 247, 219, 204, 175, 190, 248, 234, 205, 220, 249, 191,
+    235, 221, 250, 206, 222, 251, 236, 207, 237, 223, 252, 238, 253, 239, 254, 255,
+};
+
+static const int16_t vp9_col_scan_16x16[256] = {
+      0,   1,   2,   3,  16,   4,  17,   5,  18,   6,  19,  32,  20,   7,  33,  21,
+     34,   8,  35,  22,  48,  36,   9,  49,  23,  50,  37,  10,  38,  51,  24,  64,
+     52,  11,  65,  39,  25,  53,  66,  54,  40,  67,  12,  80,  26,  68,  55,  81,
+     41,  69,  13,  27,  82,  56,  70,  83,  42,  14,  84,  96,  71,  28,  57,  85,
+     97,  15,  72,  98,  43,  86,  58,  99,  29,  87, 100, 112,  73,  44, 101,  59,
+     30, 113,  88, 114,  74, 128, 102,  45,  31, 115,  60, 103,  89, 116,  75, 129,
+    117,  46, 104,  90,  61, 130, 118, 131, 132, 105,  76,  47, 119, 144,  91,  62,
+    133, 106, 145, 120, 146, 134,  77, 147, 121,  92, 135, 148,  63, 107, 136, 122,
+     93, 149, 160,  78, 150, 137, 108, 161, 162, 151, 123,  79, 138, 163, 152,  94,
+    164, 109, 165, 153, 124, 139, 176, 166,  95, 177, 167, 110, 154, 178, 125, 179,
+    140, 168, 155, 111, 180, 192, 181, 169, 141, 126, 182, 193, 194, 156, 183, 170,
+    195, 127, 142, 196, 184, 208, 197, 157, 171, 143, 185, 198, 209, 199, 210, 172,
+    158, 186, 211, 224, 212, 200, 240, 159, 213, 225, 187, 201, 173, 226, 214, 215,
+    227, 202, 228, 188, 241, 216, 174, 229, 242, 203, 243, 217, 230, 175, 189, 244,
+    231, 204, 218, 232, 245, 219, 246, 190, 233, 205, 191, 247, 234, 248, 220, 206,
+    249, 235, 221, 207, 250, 236, 222, 251, 223, 237, 238, 252, 239, 253, 254, 255,
+};
+
+static const int16_t vp9_row_scan_16x16[256] = {
+      0,  16,  32,   1,  48,  17,  64,  33,   2,  80,  18,  49,  96,  34,   3,  65,
+     19, 112,  50,  81,  35,   4, 128,  66,  20,  97,  51,  82,   5, 144,  36,  67,
+    113,  98,  21,  52, 160,  83, 129,  37,  68,   6, 114, 176,  99,  53,  22,  84,
+    145,  38,  69, 130,   7, 115, 192, 100,  54,  23,  85, 161, 146, 131,  39,  70,
+    208, 116,   8, 101, 177,  55,  86,  24, 162, 147, 132,  71, 224, 117,  40, 102,
+      9, 148,  56,  87, 193, 163, 240, 133, 178,  25, 118,  72,  41, 103, 164,  10,
+    149,  88, 134, 209, 179,  57, 119, 194,  26,  73, 165, 150, 104,  42, 135,  11,
+    180, 120,  89, 225, 195,  58,  27, 210, 151, 181, 166,  74,  43, 105,  12, 136,
+     90,  59, 241, 121,  28, 196, 167, 211, 152,  44, 182, 137,  75,  13, 226, 106,
+    122,  60, 197,  91, 168,  29, 183, 153,  14,  76, 212, 138,  45, 107,  15, 198,
+     92, 227, 169,  30, 123, 154,  61, 242, 184, 213, 139,  46,  77,  31, 108, 170,
+    199, 185, 124, 228,  93, 155, 214,  62, 140, 243,  78,  47, 200, 109, 186, 171,
+    201,  94,  63, 215, 229, 156,  79, 125, 141, 110, 216, 187, 172, 244, 202, 230,
+    217,  95, 157, 126, 245, 111, 142, 231, 188, 127, 158, 218, 173, 232, 246, 233,
+    203, 143, 247, 174, 189, 159, 219, 204, 248, 234, 249, 175, 190, 220, 205, 250,
+    235, 191, 221, 251, 236, 206, 252, 222, 207, 237, 223, 253, 238, 254, 239, 255,
+};
+
+static const int16_t vp9_default_scan_32x32[1024] = {
+       0,    1,   32,    2,   33,   64,    3,   34,   65,    4,   96,   35,   66,    5,   36,   97,   67,  128,   98,   68,   37,    6,  129,   99,    7,  160,   69,   38,  130,  100,  161,  131,
+      39,   70,    8,  101,  162,  132,  192,   71,   40,    9,  102,  163,  133,  193,   72,  224,  103,   41,  164,   10,  194,  134,  165,   73,  104,  135,  225,   42,  195,   11,  256,  166,
+     226,  196,   74,  105,  136,   43,   12,  167,  197,  227,  257,   75,  106,  137,  228,   44,  198,  168,  258,  288,   13,  229,   76,  107,  199,  138,  259,  169,  289,   45,  230,  260,
+     200,  108,   14,  170,  139,  320,  290,   77,  231,  261,   46,  201,  140,  291,  109,  232,  321,  262,  171,   78,  292,   15,  322,  202,  263,  352,  172,  293,  233,  141,  323,  110,
+      47,  203,  264,  234,  294,  353,  324,   16,   79,  204,  265,  295,  325,  173,  354,  142,  235,  384,   48,  296,  111,  266,  355,  326,   80,   17,  205,  236,  174,  356,  385,  327,
+     143,  297,  267,  357,  386,  112,   49,  328,  298,  206,  416,  237,  358,  387,   81,  175,   18,  329,  359,  388,  299,  330,  389,  113,  417,  238,  360,   50,  207,  418,  390,  331,
+      19,  448,  361,   82,  419,  391,  239,   51,  362,  420,  114,  449,  480,  421,   83,  363,  450,  422,  512,  451,  423,  115,  452,  481,  453,  482,  454,  544,  483,  455,  513,  484,
+     514,  485,  515,  486,  545,  576,  487,  546,  547,  608,  577,  578,  579,  609,  610,  611,   20,  144,  268,  392,  516,  640,   21,   52,  145,  176,  269,  300,  393,  424,  517,  548,
+     641,  672,   22,   53,   84,  146,  177,  208,  270,  301,  332,  394,  425,  456,  518,  549,  580,  642,  673,  704,   23,   54,   85,  116,  147,  178,  209,  240,  271,  302,  333,  364,
+     395,  426,  457,  488,  519,  550,  581,  612,  643,  674,  705,  736,   55,   86,  117,  179,  210,  241,  303,  334,  365,  427,  458,  489,  551,  582,  613,  675,  706,  737,   87,  118,
+     211,  242,  335,  366,  459,  490,  583,  614,  707,  738,  119,  243,  367,  491,  615,  739,   24,  148,  272,  396,  520,  644,  768,   25,   56,  149,  180,  273,  304,  397,  428,  521,
+     552,  645,  676,  769,  800,   26,   57,   88,  150,  181,  212,  274,  305,  336,  398,  429,  460,  522,  553,  584,  646,  677,  708,  770,  801,  832,   27,   58,   89,  120,  151,  182,
+     213,  244,  275,  306,  337,  368,  399,  430,  461,  492,  523,  554,  585,  616,  647,  678,  709,  740,  771,  802,  833,  864,   59,   90,  121,  183,  214,  245,  307,  338,  369,  431,
+     462,  493,  555,  586,  617,  679,  710,  741,  803,  834,  865,   91,  122,  215,  246,  339,  370,  463,  494,  587,  618,  711,  742,  835,  866,  123,  247,  371,  495,  619,  743,  867,
+      28,  152,  276,  400,  524,  648,  772,  896,   29,   60,  153,  184,  277,  308,  401,  432,  525,  556,  649,  680,  773,  804,  897,  928,   30,   61,   92,  154,  185,  216,  278,  309,
+     340,  402,  433,  464,  526,  557,  588,  650,  681,  712,  774,  805,  836,  898,  929,  960,   31,   62,   93,  124,  155,  186,  217,  248,  279,  310,  341,  372,  403,  434,  465,  496,
+     527,  558,  589,  620,  651,  682,  713,  744,  775,  806,  837,  868,  899,  930,  961,  992,   63,   94,  125,  187,  218,  249,  311,  342,  373,  435,  466,  497,  559,  590,  621,  683,
+     714,  745,  807,  838,  869,  931,  962,  993,   95,  126,  219,  250,  343,  374,  467,  498,  591,  622,  715,  746,  839,  870,  963,  994,  127,  251,  375,  499,  623,  747,  871,  995,
+     156,  280,  404,  528,  652,  776,  900,  157,  188,  281,  312,  405,  436,  529,  560,  653,  684,  777,  808,  901,  932,  158,  189,  220,  282,  313,  344,  406,  437,  468,  530,  561,
+     592,  654,  685,  716,  778,  809,  840,  902,  933,  964,  159,  190,  221,  252,  283,  314,  345,  376,  407,  438,  469,  500,  531,  562,  593,  624,  655,  686,  717,  748,  779,  810,
+     841,  872,  903,  934,  965,  996,  191,  222,  253,  315,  346,  377,  439,  470,  501,  563,  594,  625,  687,  718,  749,  811,  842,  873,  935,  966,  997,  223,  254,  347,  378,  471,
+     502,  595,  626,  719,  750,  843,  874,  967,  998,  255,  379,  503,  627,  751,  875,  999,  284,  408,  532,  656,  780,  904,  285,  316,  409,  440,  533,  564,  657,  688,  781,  812,
+     905,  936,  286,  317,  348,  410,  441,  472,  534,  565,  596,  658,  689,  720,  782,  813,  844,  906,  937,  968,  287,  318,  349,  380,  411,  442,  473,  504,  535,  566,  597,  628,
+     659,  690,  721,  752,  783,  814,  845,  876,  907,  938,  969, 1000,  319,  350,  381,  443,  474,  505,  567,  598,  629,  691,  722,  753,  815,  846,  877,  939,  970, 1001,  351,  382,
+     475,  506,  599,  630,  723,  754,  847,  878,  971, 1002,  383,  507,  631,  755,  879, 1003,  412,  536,  660,  784,  908,  413,  444,  537,  568,  661,  692,  785,  816,  909,  940,  414,
+     445,  476,  538,  569,  600,  662,  693,  724,  786,  817,  848,  910,  941,  972,  415,  446,  477,  508,  539,  570,  601,  632,  663,  694,  725,  756,  787,  818,  849,  880,  911,  942,
+     973, 1004,  447,  478,  509,  571,  602,  633,  695,  726,  757,  819,  850,  881,  943,  974, 1005,  479,  510,  603,  634,  727,  758,  851,  882,  975, 1006,  511,  635,  759,  883, 1007,
+     540,  664,  788,  912,  541,  572,  665,  696,  789,  820,  913,  944,  542,  573,  604,  666,  697,  728,  790,  821,  852,  914,  945,  976,  543,  574,  605,  636,  667,  698,  729,  760,
+     791,  822,  853,  884,  915,  946,  977, 1008,  575,  606,  637,  699,  730,  761,  823,  854,  885,  947,  978, 1009,  607,  638,  731,  762,  855,  886,  979, 1010,  639,  763,  887, 1011,
+     668,  792,  916,  669,  700,  793,  824,  917,  948,  670,  701,  732,  794,  825,  856,  918,  949,  980,  671,  702,  733,  764,  795,  826,  857,  888,  919,  950,  981, 1012,  703,  734,
+     765,  827,  858,  889,  951,  982, 1013,  735,  766,  859,  890,  983, 1014,  767,  891, 1015,  796,  920,  797,  828,  921,  952,  798,  829,  860,  922,  953,  984,  799,  830,  861,  892,
+     923,  954,  985, 1016,  831,  862,  893,  955,  986, 1017,  863,  894,  987, 1018,  895, 1019,  924,  925,  956,  926,  957,  988,  927,  958,  989, 1020,  959,  990, 1021,  991, 1022, 1023,
+};
+
+static const int16_t * const vp9_scans[5][4] = {
+    {
+        vp9_default_scan_4x4, vp9_col_scan_4x4,
+        vp9_row_scan_4x4, vp9_default_scan_4x4
+    }, {
+        vp9_default_scan_8x8, vp9_col_scan_8x8,
+        vp9_row_scan_8x8, vp9_default_scan_8x8
+    }, {
+        vp9_default_scan_16x16, vp9_col_scan_16x16,
+        vp9_row_scan_16x16, vp9_default_scan_16x16
+    }, {
+        vp9_default_scan_32x32, vp9_default_scan_32x32,
+        vp9_default_scan_32x32, vp9_default_scan_32x32
+    }, { // lossless
+        vp9_default_scan_4x4, vp9_default_scan_4x4,
+        vp9_default_scan_4x4, vp9_default_scan_4x4
+    }
+};
+
+static const int16_t vp9_default_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  0,  0 }, {  4,  1 }, {  1,  1 },
+    {  4,  4 }, {  2,  2 }, {  5,  2 }, {  8,  8 },
+    {  8,  5 }, {  6,  3 }, {  9,  6 }, { 12,  9 },
+    { 10,  7 }, { 13, 10 }, { 14, 11 }, {  0,  0 },
+};
+
+static const int16_t vp9_col_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
+    {  4,  4 }, {  5,  5 }, {  4,  4 }, {  6,  6 },
+    {  8,  8 }, {  9,  9 }, {  8,  8 }, { 12, 12 },
+    { 10, 10 }, { 13, 13 }, { 14, 14 }, {  0,  0 },
+};
+
+static const int16_t vp9_row_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  0,  0 }, {  4,  4 }, {  1,  1 },
+    {  8,  8 }, {  5,  5 }, {  1,  1 }, {  2,  2 },
+    {  9,  9 }, {  2,  2 }, {  6,  6 }, {  3,  3 },
+    { 10, 10 }, {  7,  7 }, { 11, 11 }, {  0,  0 },
+};
+
+static const int16_t vp9_default_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  0,  0 }, {  1,  1 }, {  8,  1 },
+    {  8,  8 }, {  9,  2 }, {  2,  2 }, { 16,  9 },
+    { 16, 16 }, { 17, 10 }, { 10,  3 }, {  3,  3 },
+    { 24, 17 }, { 24, 24 }, { 18, 11 }, { 11,  4 },
+    { 25, 18 }, {  4,  4 }, { 32, 25 }, { 19, 12 },
+    { 26, 19 }, { 32, 32 }, { 12,  5 }, { 33, 26 },
+    {  5,  5 }, { 40, 33 }, { 27, 20 }, { 20, 13 },
+    { 34, 27 }, { 41, 34 }, { 40, 40 }, { 13,  6 },
+    {  6,  6 }, { 35, 28 }, { 28, 21 }, { 42, 35 },
+    { 48, 48 }, { 48, 41 }, { 21, 14 }, { 14,  7 },
+    { 36, 29 }, { 49, 42 }, { 43, 36 }, { 56, 49 },
+    { 29, 22 }, { 22, 15 }, { 50, 43 }, { 44, 37 },
+    { 57, 50 }, { 37, 30 }, { 30, 23 }, { 51, 44 },
+    { 58, 51 }, { 38, 31 }, { 45, 38 }, { 52, 45 },
+    { 59, 52 }, { 46, 39 }, { 53, 46 }, { 60, 53 },
+    { 54, 47 }, { 61, 54 }, { 62, 55 }, {  0,  0 },
+};
+
+static const int16_t vp9_col_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
+    {  8,  8 }, {  3,  3 }, {  9,  9 }, {  8,  8 },
+    {  4,  4 }, { 10, 10 }, { 16, 16 }, { 11, 11 },
+    { 17, 17 }, {  5,  5 }, { 16, 16 }, { 18, 18 },
+    { 12, 12 }, { 24, 24 }, {  6,  6 }, { 25, 25 },
+    { 19, 19 }, { 24, 24 }, { 13, 13 }, { 26, 26 },
+    { 20, 20 }, { 32, 32 }, { 27, 27 }, { 33, 33 },
+    { 14, 14 }, { 21, 21 }, { 34, 34 }, { 32, 32 },
+    { 28, 28 }, { 40, 40 }, { 35, 35 }, { 22, 22 },
+    { 29, 29 }, { 41, 41 }, { 36, 36 }, { 40, 40 },
+    { 42, 42 }, { 30, 30 }, { 43, 43 }, { 48, 48 },
+    { 37, 37 }, { 49, 49 }, { 48, 48 }, { 44, 44 },
+    { 38, 38 }, { 50, 50 }, { 56, 56 }, { 51, 51 },
+    { 45, 45 }, { 57, 57 }, { 52, 52 }, { 58, 58 },
+    { 46, 46 }, { 59, 59 }, { 53, 53 }, { 60, 60 },
+    { 54, 54 }, { 61, 61 }, { 62, 62 }, {  0,  0 },
+};
+
+static const int16_t vp9_row_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  8,  8 }, {  0,  0 }, {  1,  1 },
+    { 16, 16 }, {  1,  1 }, {  9,  9 }, { 24, 24 },
+    {  2,  2 }, { 17, 17 }, {  2,  2 }, { 32, 32 },
+    { 10, 10 }, {  3,  3 }, { 25, 25 }, { 18, 18 },
+    { 11, 11 }, {  3,  3 }, { 40, 40 }, { 33, 33 },
+    { 26, 26 }, {  4,  4 }, { 19, 19 }, { 48, 48 },
+    { 12, 12 }, {  4,  4 }, { 34, 34 }, { 27, 27 },
+    {  5,  5 }, { 41, 41 }, { 20, 20 }, {  5,  5 },
+    { 13, 13 }, { 35, 35 }, { 28, 28 }, {  6,  6 },
+    { 42, 42 }, { 21, 21 }, { 49, 49 }, {  6,  6 },
+    { 36, 36 }, { 14, 14 }, { 29, 29 }, { 43, 43 },
+    {  7,  7 }, { 50, 50 }, { 22, 22 }, { 15, 15 },
+    { 37, 37 }, { 44, 44 }, { 30, 30 }, { 51, 51 },
+    { 23, 23 }, { 38, 38 }, { 45, 45 }, { 31, 31 },
+    { 52, 52 }, { 39, 39 }, { 53, 53 }, { 46, 46 },
+    { 54, 54 }, { 47, 47 }, { 55, 55 }, {  0,  0 },
+};
+
+static const int16_t vp9_default_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {   0,   0 }, {   1,   1 }, {  16,   1 },
+    {  16,  16 }, {   2,   2 }, {  17,   2 }, {  32,  17 },
+    {  32,  32 }, {   3,   3 }, {  33,  18 }, {  18,   3 },
+    {  48,  33 }, {  19,   4 }, {   4,   4 }, {  34,  19 },
+    {  48,  48 }, {  49,  34 }, {  35,  20 }, {  64,  49 },
+    {  20,   5 }, {   5,   5 }, {  50,  35 }, {  64,  64 },
+    {  65,  50 }, {  36,  21 }, {  21,   6 }, {  51,  36 },
+    {   6,   6 }, {  80,  65 }, {  66,  51 }, {  37,  22 },
+    {  81,  66 }, {  52,  37 }, {  22,   7 }, {  80,  80 },
+    {  67,  52 }, {   7,   7 }, {  82,  67 }, {  96,  81 },
+    {  53,  38 }, {  38,  23 }, {  68,  53 }, {  96,  96 },
+    {  23,   8 }, {  97,  82 }, {  83,  68 }, {  69,  54 },
+    {  54,  39 }, {   8,   8 }, {  39,  24 }, {  84,  69 },
+    {  98,  83 }, { 112,  97 }, { 112, 112 }, {  24,   9 },
+    { 113,  98 }, {  99,  84 }, {  70,  55 }, {  85,  70 },
+    {  55,  40 }, {   9,   9 }, {  40,  25 }, { 114,  99 },
+    { 100,  85 }, { 128, 113 }, { 115, 100 }, {  71,  56 },
+    {  86,  71 }, {  25,  10 }, { 129, 114 }, { 128, 128 },
+    { 101,  86 }, {  56,  41 }, {  10,  10 }, {  41,  26 },
+    { 116, 101 }, { 130, 115 }, { 144, 129 }, {  87,  72 },
+    { 102,  87 }, {  26,  11 }, {  72,  57 }, { 131, 116 },
+    { 117, 102 }, { 145, 130 }, {  57,  42 }, { 144, 144 },
+    {  11,  11 }, {  42,  27 }, { 132, 117 }, { 146, 131 },
+    { 103,  88 }, {  88,  73 }, { 118, 103 }, { 160, 145 },
+    {  73,  58 }, { 147, 132 }, { 133, 118 }, {  27,  12 },
+    { 161, 146 }, {  58,  43 }, {  12,  12 }, { 160, 160 },
+    { 119, 104 }, { 148, 133 }, {  89,  74 }, { 134, 119 },
+    { 104,  89 }, { 162, 147 }, {  43,  28 }, {  74,  59 },
+    { 176, 161 }, { 163, 148 }, {  28,  13 }, { 149, 134 },
+    { 120, 105 }, { 135, 120 }, { 177, 162 }, { 164, 149 },
+    {  13,  13 }, { 105,  90 }, {  59,  44 }, {  90,  75 },
+    { 150, 135 }, {  44,  29 }, { 178, 163 }, { 176, 176 },
+    { 136, 121 }, { 165, 150 }, { 121, 106 }, {  75,  60 },
+    { 179, 164 }, { 151, 136 }, {  29,  14 }, {  60,  45 },
+    {  14,  14 }, { 106,  91 }, { 166, 151 }, { 180, 165 },
+    { 192, 177 }, {  91,  76 }, { 192, 192 }, {  45,  30 },
+    { 137, 122 }, { 122, 107 }, { 152, 137 }, { 193, 178 },
+    {  76,  61 }, { 167, 152 }, { 181, 166 }, {  30,  15 },
+    { 194, 179 }, { 208, 193 }, { 182, 167 }, { 107,  92 },
+    { 138, 123 }, {  61,  46 }, { 153, 138 }, {  46,  31 },
+    { 195, 180 }, {  92,  77 }, { 168, 153 }, { 209, 194 },
+    { 196, 181 }, { 208, 208 }, { 123, 108 }, { 183, 168 },
+    { 210, 195 }, {  77,  62 }, { 108,  93 }, { 169, 154 },
+    { 154, 139 }, {  62,  47 }, { 197, 182 }, { 211, 196 },
+    { 184, 169 }, { 224, 209 }, { 224, 224 }, { 139, 124 },
+    {  93,  78 }, { 198, 183 }, { 124, 109 }, {  78,  63 },
+    { 212, 197 }, { 225, 210 }, { 170, 155 }, { 185, 170 },
+    { 155, 140 }, { 213, 198 }, { 199, 184 }, { 109,  94 },
+    { 226, 211 }, { 140, 125 }, {  94,  79 }, { 240, 225 },
+    { 214, 199 }, { 227, 212 }, { 200, 185 }, { 125, 110 },
+    { 241, 226 }, { 186, 171 }, { 171, 156 }, { 156, 141 },
+    { 228, 213 }, { 110,  95 }, { 215, 200 }, { 242, 227 },
+    { 141, 126 }, { 201, 186 }, { 229, 214 }, { 126, 111 },
+    { 216, 201 }, { 243, 228 }, { 172, 157 }, { 187, 172 },
+    { 230, 215 }, { 157, 142 }, { 202, 187 }, { 142, 127 },
+    { 244, 229 }, { 217, 202 }, { 231, 216 }, { 188, 173 },
+    { 245, 230 }, { 158, 143 }, { 173, 158 }, { 232, 217 },
+    { 246, 231 }, { 218, 203 }, { 203, 188 }, { 174, 159 },
+    { 189, 174 }, { 247, 232 }, { 233, 218 }, { 204, 189 },
+    { 219, 204 }, { 248, 233 }, { 190, 175 }, { 234, 219 },
+    { 220, 205 }, { 249, 234 }, { 205, 190 }, { 221, 206 },
+    { 250, 235 }, { 235, 220 }, { 206, 191 }, { 236, 221 },
+    { 222, 207 }, { 251, 236 }, { 237, 222 }, { 252, 237 },
+    { 238, 223 }, { 253, 238 }, { 254, 239 }, {   0,   0 },
+};
+
+static const int16_t vp9_col_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {   1,   1 }, {   2,   2 }, {   0,   0 },
+    {   3,   3 }, {  16,  16 }, {   4,   4 }, {  17,  17 },
+    {   5,   5 }, {  18,  18 }, {  16,  16 }, {  19,  19 },
+    {   6,   6 }, {  32,  32 }, {  20,  20 }, {  33,  33 },
+    {   7,   7 }, {  34,  34 }, {  21,  21 }, {  32,  32 },
+    {  35,  35 }, {   8,   8 }, {  48,  48 }, {  22,  22 },
+    {  49,  49 }, {  36,  36 }, {   9,   9 }, {  37,  37 },
+    {  50,  50 }, {  23,  23 }, {  48,  48 }, {  51,  51 },
+    {  10,  10 }, {  64,  64 }, {  38,  38 }, {  24,  24 },
+    {  52,  52 }, {  65,  65 }, {  53,  53 }, {  39,  39 },
+    {  66,  66 }, {  11,  11 }, {  64,  64 }, {  25,  25 },
+    {  67,  67 }, {  54,  54 }, {  80,  80 }, {  40,  40 },
+    {  68,  68 }, {  12,  12 }, {  26,  26 }, {  81,  81 },
+    {  55,  55 }, {  69,  69 }, {  82,  82 }, {  41,  41 },
+    {  13,  13 }, {  83,  83 }, {  80,  80 }, {  70,  70 },
+    {  27,  27 }, {  56,  56 }, {  84,  84 }, {  96,  96 },
+    {  14,  14 }, {  71,  71 }, {  97,  97 }, {  42,  42 },
+    {  85,  85 }, {  57,  57 }, {  98,  98 }, {  28,  28 },
+    {  86,  86 }, {  99,  99 }, {  96,  96 }, {  72,  72 },
+    {  43,  43 }, { 100, 100 }, {  58,  58 }, {  29,  29 },
+    { 112, 112 }, {  87,  87 }, { 113, 113 }, {  73,  73 },
+    { 112, 112 }, { 101, 101 }, {  44,  44 }, {  30,  30 },
+    { 114, 114 }, {  59,  59 }, { 102, 102 }, {  88,  88 },
+    { 115, 115 }, {  74,  74 }, { 128, 128 }, { 116, 116 },
+    {  45,  45 }, { 103, 103 }, {  89,  89 }, {  60,  60 },
+    { 129, 129 }, { 117, 117 }, { 130, 130 }, { 131, 131 },
+    { 104, 104 }, {  75,  75 }, {  46,  46 }, { 118, 118 },
+    { 128, 128 }, {  90,  90 }, {  61,  61 }, { 132, 132 },
+    { 105, 105 }, { 144, 144 }, { 119, 119 }, { 145, 145 },
+    { 133, 133 }, {  76,  76 }, { 146, 146 }, { 120, 120 },
+    {  91,  91 }, { 134, 134 }, { 147, 147 }, {  62,  62 },
+    { 106, 106 }, { 135, 135 }, { 121, 121 }, {  92,  92 },
+    { 148, 148 }, { 144, 144 }, {  77,  77 }, { 149, 149 },
+    { 136, 136 }, { 107, 107 }, { 160, 160 }, { 161, 161 },
+    { 150, 150 }, { 122, 122 }, {  78,  78 }, { 137, 137 },
+    { 162, 162 }, { 151, 151 }, {  93,  93 }, { 163, 163 },
+    { 108, 108 }, { 164, 164 }, { 152, 152 }, { 123, 123 },
+    { 138, 138 }, { 160, 160 }, { 165, 165 }, {  94,  94 },
+    { 176, 176 }, { 166, 166 }, { 109, 109 }, { 153, 153 },
+    { 177, 177 }, { 124, 124 }, { 178, 178 }, { 139, 139 },
+    { 167, 167 }, { 154, 154 }, { 110, 110 }, { 179, 179 },
+    { 176, 176 }, { 180, 180 }, { 168, 168 }, { 140, 140 },
+    { 125, 125 }, { 181, 181 }, { 192, 192 }, { 193, 193 },
+    { 155, 155 }, { 182, 182 }, { 169, 169 }, { 194, 194 },
+    { 126, 126 }, { 141, 141 }, { 195, 195 }, { 183, 183 },
+    { 192, 192 }, { 196, 196 }, { 156, 156 }, { 170, 170 },
+    { 142, 142 }, { 184, 184 }, { 197, 197 }, { 208, 208 },
+    { 198, 198 }, { 209, 209 }, { 171, 171 }, { 157, 157 },
+    { 185, 185 }, { 210, 210 }, { 208, 208 }, { 211, 211 },
+    { 199, 199 }, { 224, 224 }, { 158, 158 }, { 212, 212 },
+    { 224, 224 }, { 186, 186 }, { 200, 200 }, { 172, 172 },
+    { 225, 225 }, { 213, 213 }, { 214, 214 }, { 226, 226 },
+    { 201, 201 }, { 227, 227 }, { 187, 187 }, { 240, 240 },
+    { 215, 215 }, { 173, 173 }, { 228, 228 }, { 241, 241 },
+    { 202, 202 }, { 242, 242 }, { 216, 216 }, { 229, 229 },
+    { 174, 174 }, { 188, 188 }, { 243, 243 }, { 230, 230 },
+    { 203, 203 }, { 217, 217 }, { 231, 231 }, { 244, 244 },
+    { 218, 218 }, { 245, 245 }, { 189, 189 }, { 232, 232 },
+    { 204, 204 }, { 190, 190 }, { 246, 246 }, { 233, 233 },
+    { 247, 247 }, { 219, 219 }, { 205, 205 }, { 248, 248 },
+    { 234, 234 }, { 220, 220 }, { 206, 206 }, { 249, 249 },
+    { 235, 235 }, { 221, 221 }, { 250, 250 }, { 222, 222 },
+    { 236, 236 }, { 237, 237 }, { 251, 251 }, { 238, 238 },
+    { 252, 252 }, { 253, 253 }, { 254, 254 }, {   0,   0 },
+};
+
+static const int16_t vp9_row_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {  16,  16 }, {   0,   0 }, {  32,  32 },
+    {   1,   1 }, {  48,  48 }, {  17,  17 }, {   1,   1 },
+    {  64,  64 }, {   2,   2 }, {  33,  33 }, {  80,  80 },
+    {  18,  18 }, {   2,   2 }, {  49,  49 }, {   3,   3 },
+    {  96,  96 }, {  34,  34 }, {  65,  65 }, {  19,  19 },
+    {   3,   3 }, { 112, 112 }, {  50,  50 }, {   4,   4 },
+    {  81,  81 }, {  35,  35 }, {  66,  66 }, {   4,   4 },
+    { 128, 128 }, {  20,  20 }, {  51,  51 }, {  97,  97 },
+    {  82,  82 }, {   5,   5 }, {  36,  36 }, { 144, 144 },
+    {  67,  67 }, { 113, 113 }, {  21,  21 }, {  52,  52 },
+    {   5,   5 }, {  98,  98 }, { 160, 160 }, {  83,  83 },
+    {  37,  37 }, {   6,   6 }, {  68,  68 }, { 129, 129 },
+    {  22,  22 }, {  53,  53 }, { 114, 114 }, {   6,   6 },
+    {  99,  99 }, { 176, 176 }, {  84,  84 }, {  38,  38 },
+    {   7,   7 }, {  69,  69 }, { 145, 145 }, { 130, 130 },
+    { 115, 115 }, {  23,  23 }, {  54,  54 }, { 192, 192 },
+    { 100, 100 }, {   7,   7 }, {  85,  85 }, { 161, 161 },
+    {  39,  39 }, {  70,  70 }, {   8,   8 }, { 146, 146 },
+    { 131, 131 }, { 116, 116 }, {  55,  55 }, { 208, 208 },
+    { 101, 101 }, {  24,  24 }, {  86,  86 }, {   8,   8 },
+    { 132, 132 }, {  40,  40 }, {  71,  71 }, { 177, 177 },
+    { 147, 147 }, { 224, 224 }, { 117, 117 }, { 162, 162 },
+    {   9,   9 }, { 102, 102 }, {  56,  56 }, {  25,  25 },
+    {  87,  87 }, { 148, 148 }, {   9,   9 }, { 133, 133 },
+    {  72,  72 }, { 118, 118 }, { 193, 193 }, { 163, 163 },
+    {  41,  41 }, { 103, 103 }, { 178, 178 }, {  10,  10 },
+    {  57,  57 }, { 149, 149 }, { 134, 134 }, {  88,  88 },
+    {  26,  26 }, { 119, 119 }, {  10,  10 }, { 164, 164 },
+    { 104, 104 }, {  73,  73 }, { 209, 209 }, { 179, 179 },
+    {  42,  42 }, {  11,  11 }, { 194, 194 }, { 135, 135 },
+    { 165, 165 }, { 150, 150 }, {  58,  58 }, {  27,  27 },
+    {  89,  89 }, {  11,  11 }, { 120, 120 }, {  74,  74 },
+    {  43,  43 }, { 225, 225 }, { 105, 105 }, {  12,  12 },
+    { 180, 180 }, { 151, 151 }, { 195, 195 }, { 136, 136 },
+    {  28,  28 }, { 166, 166 }, { 121, 121 }, {  59,  59 },
+    {  12,  12 }, { 210, 210 }, {  90,  90 }, { 106, 106 },
+    {  44,  44 }, { 181, 181 }, {  75,  75 }, { 152, 152 },
+    {  13,  13 }, { 167, 167 }, { 137, 137 }, {  13,  13 },
+    {  60,  60 }, { 196, 196 }, { 122, 122 }, {  29,  29 },
+    {  91,  91 }, {  14,  14 }, { 182, 182 }, {  76,  76 },
+    { 211, 211 }, { 153, 153 }, {  14,  14 }, { 107, 107 },
+    { 138, 138 }, {  45,  45 }, { 226, 226 }, { 168, 168 },
+    { 197, 197 }, { 123, 123 }, {  30,  30 }, {  61,  61 },
+    {  15,  15 }, {  92,  92 }, { 154, 154 }, { 183, 183 },
+    { 169, 169 }, { 108, 108 }, { 212, 212 }, {  77,  77 },
+    { 139, 139 }, { 198, 198 }, {  46,  46 }, { 124, 124 },
+    { 227, 227 }, {  62,  62 }, {  31,  31 }, { 184, 184 },
+    {  93,  93 }, { 170, 170 }, { 155, 155 }, { 185, 185 },
+    {  78,  78 }, {  47,  47 }, { 199, 199 }, { 213, 213 },
+    { 140, 140 }, {  63,  63 }, { 109, 109 }, { 125, 125 },
+    {  94,  94 }, { 200, 200 }, { 171, 171 }, { 156, 156 },
+    { 228, 228 }, { 186, 186 }, { 214, 214 }, { 201, 201 },
+    {  79,  79 }, { 141, 141 }, { 110, 110 }, { 229, 229 },
+    {  95,  95 }, { 126, 126 }, { 215, 215 }, { 172, 172 },
+    { 111, 111 }, { 142, 142 }, { 202, 202 }, { 157, 157 },
+    { 216, 216 }, { 230, 230 }, { 217, 217 }, { 187, 187 },
+    { 127, 127 }, { 231, 231 }, { 158, 158 }, { 173, 173 },
+    { 143, 143 }, { 203, 203 }, { 188, 188 }, { 232, 232 },
+    { 218, 218 }, { 233, 233 }, { 159, 159 }, { 174, 174 },
+    { 204, 204 }, { 189, 189 }, { 234, 234 }, { 219, 219 },
+    { 175, 175 }, { 205, 205 }, { 235, 235 }, { 220, 220 },
+    { 190, 190 }, { 236, 236 }, { 206, 206 }, { 191, 191 },
+    { 221, 221 }, { 207, 207 }, { 237, 237 }, { 222, 222 },
+    { 238, 238 }, { 223, 223 }, { 239, 239 }, {   0,   0 },
+};
+
+static const int16_t vp9_default_scan_32x32_nb[1024][2] = {
+    {    0,    0 }, {    0,    0 }, {    1,    1 }, {   32,    1 },
+    {   32,   32 }, {    2,    2 }, {   33,    2 }, {   64,   33 },
+    {    3,    3 }, {   64,   64 }, {   34,    3 }, {   65,   34 },
+    {    4,    4 }, {   35,    4 }, {   96,   65 }, {   66,   35 },
+    {   96,   96 }, {   97,   66 }, {   67,   36 }, {   36,    5 },
+    {    5,    5 }, {  128,   97 }, {   98,   67 }, {    6,    6 },
+    {  128,  128 }, {   68,   37 }, {   37,    6 }, {  129,   98 },
+    {   99,   68 }, {  160,  129 }, {  130,   99 }, {   38,    7 },
+    {   69,   38 }, {    7,    7 }, {  100,   69 }, {  161,  130 },
+    {  131,  100 }, {  160,  160 }, {   70,   39 }, {   39,    8 },
+    {    8,    8 }, {  101,   70 }, {  162,  131 }, {  132,  101 },
+    {  192,  161 }, {   71,   40 }, {  192,  192 }, {  102,   71 },
+    {   40,    9 }, {  163,  132 }, {    9,    9 }, {  193,  162 },
+    {  133,  102 }, {  164,  133 }, {   72,   41 }, {  103,   72 },
+    {  134,  103 }, {  224,  193 }, {   41,   10 }, {  194,  163 },
+    {   10,   10 }, {  224,  224 }, {  165,  134 }, {  225,  194 },
+    {  195,  164 }, {   73,   42 }, {  104,   73 }, {  135,  104 },
+    {   42,   11 }, {   11,   11 }, {  166,  135 }, {  196,  165 },
+    {  226,  195 }, {  256,  225 }, {   74,   43 }, {  105,   74 },
+    {  136,  105 }, {  227,  196 }, {   43,   12 }, {  197,  166 },
+    {  167,  136 }, {  257,  226 }, {  256,  256 }, {   12,   12 },
+    {  228,  197 }, {   75,   44 }, {  106,   75 }, {  198,  167 },
+    {  137,  106 }, {  258,  227 }, {  168,  137 }, {  288,  257 },
+    {   44,   13 }, {  229,  198 }, {  259,  228 }, {  199,  168 },
+    {  107,   76 }, {   13,   13 }, {  169,  138 }, {  138,  107 },
+    {  288,  288 }, {  289,  258 }, {   76,   45 }, {  230,  199 },
+    {  260,  229 }, {   45,   14 }, {  200,  169 }, {  139,  108 },
+    {  290,  259 }, {  108,   77 }, {  231,  200 }, {  320,  289 },
+    {  261,  230 }, {  170,  139 }, {   77,   46 }, {  291,  260 },
+    {   14,   14 }, {  321,  290 }, {  201,  170 }, {  262,  231 },
+    {  320,  320 }, {  171,  140 }, {  292,  261 }, {  232,  201 },
+    {  140,  109 }, {  322,  291 }, {  109,   78 }, {   46,   15 },
+    {  202,  171 }, {  263,  232 }, {  233,  202 }, {  293,  262 },
+    {  352,  321 }, {  323,  292 }, {   15,   15 }, {   78,   47 },
+    {  203,  172 }, {  264,  233 }, {  294,  263 }, {  324,  293 },
+    {  172,  141 }, {  353,  322 }, {  141,  110 }, {  234,  203 },
+    {  352,  352 }, {   47,   16 }, {  295,  264 }, {  110,   79 },
+    {  265,  234 }, {  354,  323 }, {  325,  294 }, {   79,   48 },
+    {   16,   16 }, {  204,  173 }, {  235,  204 }, {  173,  142 },
+    {  355,  324 }, {  384,  353 }, {  326,  295 }, {  142,  111 },
+    {  296,  265 }, {  266,  235 }, {  356,  325 }, {  385,  354 },
+    {  111,   80 }, {   48,   17 }, {  327,  296 }, {  297,  266 },
+    {  205,  174 }, {  384,  384 }, {  236,  205 }, {  357,  326 },
+    {  386,  355 }, {   80,   49 }, {  174,  143 }, {   17,   17 },
+    {  328,  297 }, {  358,  327 }, {  387,  356 }, {  298,  267 },
+    {  329,  298 }, {  388,  357 }, {  112,   81 }, {  416,  385 },
+    {  237,  206 }, {  359,  328 }, {   49,   18 }, {  206,  175 },
+    {  417,  386 }, {  389,  358 }, {  330,  299 }, {   18,   18 },
+    {  416,  416 }, {  360,  329 }, {   81,   50 }, {  418,  387 },
+    {  390,  359 }, {  238,  207 }, {   50,   19 }, {  361,  330 },
+    {  419,  388 }, {  113,   82 }, {  448,  417 }, {  448,  448 },
+    {  420,  389 }, {   82,   51 }, {  362,  331 }, {  449,  418 },
+    {  421,  390 }, {  480,  480 }, {  450,  419 }, {  422,  391 },
+    {  114,   83 }, {  451,  420 }, {  480,  449 }, {  452,  421 },
+    {  481,  450 }, {  453,  422 }, {  512,  512 }, {  482,  451 },
+    {  454,  423 }, {  512,  481 }, {  483,  452 }, {  513,  482 },
+    {  484,  453 }, {  514,  483 }, {  485,  454 }, {  544,  513 },
+    {  544,  544 }, {  486,  455 }, {  545,  514 }, {  546,  515 },
+    {  576,  576 }, {  576,  545 }, {  577,  546 }, {  578,  547 },
+    {  608,  577 }, {  609,  578 }, {  610,  579 }, {   19,   19 },
+    {  143,  112 }, {  267,  236 }, {  391,  360 }, {  515,  484 },
+    {  608,  608 }, {   20,   20 }, {   51,   20 }, {  144,  113 },
+    {  175,  144 }, {  268,  237 }, {  299,  268 }, {  392,  361 },
+    {  423,  392 }, {  516,  485 }, {  547,  516 }, {  640,  609 },
+    {  640,  640 }, {   21,   21 }, {   52,   21 }, {   83,   52 },
+    {  145,  114 }, {  176,  145 }, {  207,  176 }, {  269,  238 },
+    {  300,  269 }, {  331,  300 }, {  393,  362 }, {  424,  393 },
+    {  455,  424 }, {  517,  486 }, {  548,  517 }, {  579,  548 },
+    {  641,  610 }, {  672,  641 }, {  672,  672 }, {   22,   22 },
+    {   53,   22 }, {   84,   53 }, {  115,   84 }, {  146,  115 },
+    {  177,  146 }, {  208,  177 }, {  239,  208 }, {  270,  239 },
+    {  301,  270 }, {  332,  301 }, {  363,  332 }, {  394,  363 },
+    {  425,  394 }, {  456,  425 }, {  487,  456 }, {  518,  487 },
+    {  549,  518 }, {  580,  549 }, {  611,  580 }, {  642,  611 },
+    {  673,  642 }, {  704,  673 }, {  704,  704 }, {   54,   23 },
+    {   85,   54 }, {  116,   85 }, {  178,  147 }, {  209,  178 },
+    {  240,  209 }, {  302,  271 }, {  333,  302 }, {  364,  333 },
+    {  426,  395 }, {  457,  426 }, {  488,  457 }, {  550,  519 },
+    {  581,  550 }, {  612,  581 }, {  674,  643 }, {  705,  674 },
+    {  736,  705 }, {   86,   55 }, {  117,   86 }, {  210,  179 },
+    {  241,  210 }, {  334,  303 }, {  365,  334 }, {  458,  427 },
+    {  489,  458 }, {  582,  551 }, {  613,  582 }, {  706,  675 },
+    {  737,  706 }, {  118,   87 }, {  242,  211 }, {  366,  335 },
+    {  490,  459 }, {  614,  583 }, {  738,  707 }, {   23,   23 },
+    {  147,  116 }, {  271,  240 }, {  395,  364 }, {  519,  488 },
+    {  643,  612 }, {  736,  736 }, {   24,   24 }, {   55,   24 },
+    {  148,  117 }, {  179,  148 }, {  272,  241 }, {  303,  272 },
+    {  396,  365 }, {  427,  396 }, {  520,  489 }, {  551,  520 },
+    {  644,  613 }, {  675,  644 }, {  768,  737 }, {  768,  768 },
+    {   25,   25 }, {   56,   25 }, {   87,   56 }, {  149,  118 },
+    {  180,  149 }, {  211,  180 }, {  273,  242 }, {  304,  273 },
+    {  335,  304 }, {  397,  366 }, {  428,  397 }, {  459,  428 },
+    {  521,  490 }, {  552,  521 }, {  583,  552 }, {  645,  614 },
+    {  676,  645 }, {  707,  676 }, {  769,  738 }, {  800,  769 },
+    {  800,  800 }, {   26,   26 }, {   57,   26 }, {   88,   57 },
+    {  119,   88 }, {  150,  119 }, {  181,  150 }, {  212,  181 },
+    {  243,  212 }, {  274,  243 }, {  305,  274 }, {  336,  305 },
+    {  367,  336 }, {  398,  367 }, {  429,  398 }, {  460,  429 },
+    {  491,  460 }, {  522,  491 }, {  553,  522 }, {  584,  553 },
+    {  615,  584 }, {  646,  615 }, {  677,  646 }, {  708,  677 },
+    {  739,  708 }, {  770,  739 }, {  801,  770 }, {  832,  801 },
+    {  832,  832 }, {   58,   27 }, {   89,   58 }, {  120,   89 },
+    {  182,  151 }, {  213,  182 }, {  244,  213 }, {  306,  275 },
+    {  337,  306 }, {  368,  337 }, {  430,  399 }, {  461,  430 },
+    {  492,  461 }, {  554,  523 }, {  585,  554 }, {  616,  585 },
+    {  678,  647 }, {  709,  678 }, {  740,  709 }, {  802,  771 },
+    {  833,  802 }, {  864,  833 }, {   90,   59 }, {  121,   90 },
+    {  214,  183 }, {  245,  214 }, {  338,  307 }, {  369,  338 },
+    {  462,  431 }, {  493,  462 }, {  586,  555 }, {  617,  586 },
+    {  710,  679 }, {  741,  710 }, {  834,  803 }, {  865,  834 },
+    {  122,   91 }, {  246,  215 }, {  370,  339 }, {  494,  463 },
+    {  618,  587 }, {  742,  711 }, {  866,  835 }, {   27,   27 },
+    {  151,  120 }, {  275,  244 }, {  399,  368 }, {  523,  492 },
+    {  647,  616 }, {  771,  740 }, {  864,  864 }, {   28,   28 },
+    {   59,   28 }, {  152,  121 }, {  183,  152 }, {  276,  245 },
+    {  307,  276 }, {  400,  369 }, {  431,  400 }, {  524,  493 },
+    {  555,  524 }, {  648,  617 }, {  679,  648 }, {  772,  741 },
+    {  803,  772 }, {  896,  865 }, {  896,  896 }, {   29,   29 },
+    {   60,   29 }, {   91,   60 }, {  153,  122 }, {  184,  153 },
+    {  215,  184 }, {  277,  246 }, {  308,  277 }, {  339,  308 },
+    {  401,  370 }, {  432,  401 }, {  463,  432 }, {  525,  494 },
+    {  556,  525 }, {  587,  556 }, {  649,  618 }, {  680,  649 },
+    {  711,  680 }, {  773,  742 }, {  804,  773 }, {  835,  804 },
+    {  897,  866 }, {  928,  897 }, {  928,  928 }, {   30,   30 },
+    {   61,   30 }, {   92,   61 }, {  123,   92 }, {  154,  123 },
+    {  185,  154 }, {  216,  185 }, {  247,  216 }, {  278,  247 },
+    {  309,  278 }, {  340,  309 }, {  371,  340 }, {  402,  371 },
+    {  433,  402 }, {  464,  433 }, {  495,  464 }, {  526,  495 },
+    {  557,  526 }, {  588,  557 }, {  619,  588 }, {  650,  619 },
+    {  681,  650 }, {  712,  681 }, {  743,  712 }, {  774,  743 },
+    {  805,  774 }, {  836,  805 }, {  867,  836 }, {  898,  867 },
+    {  929,  898 }, {  960,  929 }, {  960,  960 }, {   62,   31 },
+    {   93,   62 }, {  124,   93 }, {  186,  155 }, {  217,  186 },
+    {  248,  217 }, {  310,  279 }, {  341,  310 }, {  372,  341 },
+    {  434,  403 }, {  465,  434 }, {  496,  465 }, {  558,  527 },
+    {  589,  558 }, {  620,  589 }, {  682,  651 }, {  713,  682 },
+    {  744,  713 }, {  806,  775 }, {  837,  806 }, {  868,  837 },
+    {  930,  899 }, {  961,  930 }, {  992,  961 }, {   94,   63 },
+    {  125,   94 }, {  218,  187 }, {  249,  218 }, {  342,  311 },
+    {  373,  342 }, {  466,  435 }, {  497,  466 }, {  590,  559 },
+    {  621,  590 }, {  714,  683 }, {  745,  714 }, {  838,  807 },
+    {  869,  838 }, {  962,  931 }, {  993,  962 }, {  126,   95 },
+    {  250,  219 }, {  374,  343 }, {  498,  467 }, {  622,  591 },
+    {  746,  715 }, {  870,  839 }, {  994,  963 }, {  155,  124 },
+    {  279,  248 }, {  403,  372 }, {  527,  496 }, {  651,  620 },
+    {  775,  744 }, {  899,  868 }, {  156,  125 }, {  187,  156 },
+    {  280,  249 }, {  311,  280 }, {  404,  373 }, {  435,  404 },
+    {  528,  497 }, {  559,  528 }, {  652,  621 }, {  683,  652 },
+    {  776,  745 }, {  807,  776 }, {  900,  869 }, {  931,  900 },
+    {  157,  126 }, {  188,  157 }, {  219,  188 }, {  281,  250 },
+    {  312,  281 }, {  343,  312 }, {  405,  374 }, {  436,  405 },
+    {  467,  436 }, {  529,  498 }, {  560,  529 }, {  591,  560 },
+    {  653,  622 }, {  684,  653 }, {  715,  684 }, {  777,  746 },
+    {  808,  777 }, {  839,  808 }, {  901,  870 }, {  932,  901 },
+    {  963,  932 }, {  158,  127 }, {  189,  158 }, {  220,  189 },
+    {  251,  220 }, {  282,  251 }, {  313,  282 }, {  344,  313 },
+    {  375,  344 }, {  406,  375 }, {  437,  406 }, {  468,  437 },
+    {  499,  468 }, {  530,  499 }, {  561,  530 }, {  592,  561 },
+    {  623,  592 }, {  654,  623 }, {  685,  654 }, {  716,  685 },
+    {  747,  716 }, {  778,  747 }, {  809,  778 }, {  840,  809 },
+    {  871,  840 }, {  902,  871 }, {  933,  902 }, {  964,  933 },
+    {  995,  964 }, {  190,  159 }, {  221,  190 }, {  252,  221 },
+    {  314,  283 }, {  345,  314 }, {  376,  345 }, {  438,  407 },
+    {  469,  438 }, {  500,  469 }, {  562,  531 }, {  593,  562 },
+    {  624,  593 }, {  686,  655 }, {  717,  686 }, {  748,  717 },
+    {  810,  779 }, {  841,  810 }, {  872,  841 }, {  934,  903 },
+    {  965,  934 }, {  996,  965 }, {  222,  191 }, {  253,  222 },
+    {  346,  315 }, {  377,  346 }, {  470,  439 }, {  501,  470 },
+    {  594,  563 }, {  625,  594 }, {  718,  687 }, {  749,  718 },
+    {  842,  811 }, {  873,  842 }, {  966,  935 }, {  997,  966 },
+    {  254,  223 }, {  378,  347 }, {  502,  471 }, {  626,  595 },
+    {  750,  719 }, {  874,  843 }, {  998,  967 }, {  283,  252 },
+    {  407,  376 }, {  531,  500 }, {  655,  624 }, {  779,  748 },
+    {  903,  872 }, {  284,  253 }, {  315,  284 }, {  408,  377 },
+    {  439,  408 }, {  532,  501 }, {  563,  532 }, {  656,  625 },
+    {  687,  656 }, {  780,  749 }, {  811,  780 }, {  904,  873 },
+    {  935,  904 }, {  285,  254 }, {  316,  285 }, {  347,  316 },
+    {  409,  378 }, {  440,  409 }, {  471,  440 }, {  533,  502 },
+    {  564,  533 }, {  595,  564 }, {  657,  626 }, {  688,  657 },
+    {  719,  688 }, {  781,  750 }, {  812,  781 }, {  843,  812 },
+    {  905,  874 }, {  936,  905 }, {  967,  936 }, {  286,  255 },
+    {  317,  286 }, {  348,  317 }, {  379,  348 }, {  410,  379 },
+    {  441,  410 }, {  472,  441 }, {  503,  472 }, {  534,  503 },
+    {  565,  534 }, {  596,  565 }, {  627,  596 }, {  658,  627 },
+    {  689,  658 }, {  720,  689 }, {  751,  720 }, {  782,  751 },
+    {  813,  782 }, {  844,  813 }, {  875,  844 }, {  906,  875 },
+    {  937,  906 }, {  968,  937 }, {  999,  968 }, {  318,  287 },
+    {  349,  318 }, {  380,  349 }, {  442,  411 }, {  473,  442 },
+    {  504,  473 }, {  566,  535 }, {  597,  566 }, {  628,  597 },
+    {  690,  659 }, {  721,  690 }, {  752,  721 }, {  814,  783 },
+    {  845,  814 }, {  876,  845 }, {  938,  907 }, {  969,  938 },
+    { 1000,  969 }, {  350,  319 }, {  381,  350 }, {  474,  443 },
+    {  505,  474 }, {  598,  567 }, {  629,  598 }, {  722,  691 },
+    {  753,  722 }, {  846,  815 }, {  877,  846 }, {  970,  939 },
+    { 1001,  970 }, {  382,  351 }, {  506,  475 }, {  630,  599 },
+    {  754,  723 }, {  878,  847 }, { 1002,  971 }, {  411,  380 },
+    {  535,  504 }, {  659,  628 }, {  783,  752 }, {  907,  876 },
+    {  412,  381 }, {  443,  412 }, {  536,  505 }, {  567,  536 },
+    {  660,  629 }, {  691,  660 }, {  784,  753 }, {  815,  784 },
+    {  908,  877 }, {  939,  908 }, {  413,  382 }, {  444,  413 },
+    {  475,  444 }, {  537,  506 }, {  568,  537 }, {  599,  568 },
+    {  661,  630 }, {  692,  661 }, {  723,  692 }, {  785,  754 },
+    {  816,  785 }, {  847,  816 }, {  909,  878 }, {  940,  909 },
+    {  971,  940 }, {  414,  383 }, {  445,  414 }, {  476,  445 },
+    {  507,  476 }, {  538,  507 }, {  569,  538 }, {  600,  569 },
+    {  631,  600 }, {  662,  631 }, {  693,  662 }, {  724,  693 },
+    {  755,  724 }, {  786,  755 }, {  817,  786 }, {  848,  817 },
+    {  879,  848 }, {  910,  879 }, {  941,  910 }, {  972,  941 },
+    { 1003,  972 }, {  446,  415 }, {  477,  446 }, {  508,  477 },
+    {  570,  539 }, {  601,  570 }, {  632,  601 }, {  694,  663 },
+    {  725,  694 }, {  756,  725 }, {  818,  787 }, {  849,  818 },
+    {  880,  849 }, {  942,  911 }, {  973,  942 }, { 1004,  973 },
+    {  478,  447 }, {  509,  478 }, {  602,  571 }, {  633,  602 },
+    {  726,  695 }, {  757,  726 }, {  850,  819 }, {  881,  850 },
+    {  974,  943 }, { 1005,  974 }, {  510,  479 }, {  634,  603 },
+    {  758,  727 }, {  882,  851 }, { 1006,  975 }, {  539,  508 },
+    {  663,  632 }, {  787,  756 }, {  911,  880 }, {  540,  509 },
+    {  571,  540 }, {  664,  633 }, {  695,  664 }, {  788,  757 },
+    {  819,  788 }, {  912,  881 }, {  943,  912 }, {  541,  510 },
+    {  572,  541 }, {  603,  572 }, {  665,  634 }, {  696,  665 },
+    {  727,  696 }, {  789,  758 }, {  820,  789 }, {  851,  820 },
+    {  913,  882 }, {  944,  913 }, {  975,  944 }, {  542,  511 },
+    {  573,  542 }, {  604,  573 }, {  635,  604 }, {  666,  635 },
+    {  697,  666 }, {  728,  697 }, {  759,  728 }, {  790,  759 },
+    {  821,  790 }, {  852,  821 }, {  883,  852 }, {  914,  883 },
+    {  945,  914 }, {  976,  945 }, { 1007,  976 }, {  574,  543 },
+    {  605,  574 }, {  636,  605 }, {  698,  667 }, {  729,  698 },
+    {  760,  729 }, {  822,  791 }, {  853,  822 }, {  884,  853 },
+    {  946,  915 }, {  977,  946 }, { 1008,  977 }, {  606,  575 },
+    {  637,  606 }, {  730,  699 }, {  761,  730 }, {  854,  823 },
+    {  885,  854 }, {  978,  947 }, { 1009,  978 }, {  638,  607 },
+    {  762,  731 }, {  886,  855 }, { 1010,  979 }, {  667,  636 },
+    {  791,  760 }, {  915,  884 }, {  668,  637 }, {  699,  668 },
+    {  792,  761 }, {  823,  792 }, {  916,  885 }, {  947,  916 },
+    {  669,  638 }, {  700,  669 }, {  731,  700 }, {  793,  762 },
+    {  824,  793 }, {  855,  824 }, {  917,  886 }, {  948,  917 },
+    {  979,  948 }, {  670,  639 }, {  701,  670 }, {  732,  701 },
+    {  763,  732 }, {  794,  763 }, {  825,  794 }, {  856,  825 },
+    {  887,  856 }, {  918,  887 }, {  949,  918 }, {  980,  949 },
+    { 1011,  980 }, {  702,  671 }, {  733,  702 }, {  764,  733 },
+    {  826,  795 }, {  857,  826 }, {  888,  857 }, {  950,  919 },
+    {  981,  950 }, { 1012,  981 }, {  734,  703 }, {  765,  734 },
+    {  858,  827 }, {  889,  858 }, {  982,  951 }, { 1013,  982 },
+    {  766,  735 }, {  890,  859 }, { 1014,  983 }, {  795,  764 },
+    {  919,  888 }, {  796,  765 }, {  827,  796 }, {  920,  889 },
+    {  951,  920 }, {  797,  766 }, {  828,  797 }, {  859,  828 },
+    {  921,  890 }, {  952,  921 }, {  983,  952 }, {  798,  767 },
+    {  829,  798 }, {  860,  829 }, {  891,  860 }, {  922,  891 },
+    {  953,  922 }, {  984,  953 }, { 1015,  984 }, {  830,  799 },
+    {  861,  830 }, {  892,  861 }, {  954,  923 }, {  985,  954 },
+    { 1016,  985 }, {  862,  831 }, {  893,  862 }, {  986,  955 },
+    { 1017,  986 }, {  894,  863 }, { 1018,  987 }, {  923,  892 },
+    {  924,  893 }, {  955,  924 }, {  925,  894 }, {  956,  925 },
+    {  987,  956 }, {  926,  895 }, {  957,  926 }, {  988,  957 },
+    { 1019,  988 }, {  958,  927 }, {  989,  958 }, { 1020,  989 },
+    {  990,  959 }, { 1021,  990 }, { 1022,  991 }, {    0,    0 },
+};
+
+static const int16_t (* const vp9_scans_nb[5][4])[2] = {
+    {
+        vp9_default_scan_4x4_nb, vp9_col_scan_4x4_nb,
+        vp9_row_scan_4x4_nb, vp9_default_scan_4x4_nb
+    }, {
+        vp9_default_scan_8x8_nb, vp9_col_scan_8x8_nb,
+        vp9_row_scan_8x8_nb, vp9_default_scan_8x8_nb
+    }, {
+        vp9_default_scan_16x16_nb, vp9_col_scan_16x16_nb,
+        vp9_row_scan_16x16_nb, vp9_default_scan_16x16_nb
+    }, {
+        vp9_default_scan_32x32_nb, vp9_default_scan_32x32_nb,
+        vp9_default_scan_32x32_nb, vp9_default_scan_32x32_nb
+    }, { // lossless
+        vp9_default_scan_4x4_nb, vp9_default_scan_4x4_nb,
+        vp9_default_scan_4x4_nb, vp9_default_scan_4x4_nb
+    }
+};
+
+static const uint8_t vp9_model_pareto8[256][8] = {
+    {   6,  86, 128,  11,  87,  42,  91,  52 },
+    {   3,  86, 128,   6,  86,  23,  88,  29 },
+    {   6,  86, 128,  11,  87,  42,  91,  52 },
+    {   9,  86, 129,  17,  88,  61,  94,  76 },
+    {  12,  86, 129,  22,  88,  77,  97,  93 },
+    {  15,  87, 129,  28,  89,  93, 100, 110 },
+    {  17,  87, 129,  33,  90, 105, 103, 123 },
+    {  20,  88, 130,  38,  91, 118, 106, 136 },
+    {  23,  88, 130,  43,  91, 128, 108, 146 },
+    {  26,  89, 131,  48,  92, 139, 111, 156 },
+    {  28,  89, 131,  53,  93, 147, 114, 163 },
+    {  31,  90, 131,  58,  94, 156, 117, 171 },
+    {  34,  90, 131,  62,  94, 163, 119, 177 },
+    {  37,  90, 132,  66,  95, 171, 122, 184 },
+    {  39,  90, 132,  70,  96, 177, 124, 189 },
+    {  42,  91, 132,  75,  97, 183, 127, 194 },
+    {  44,  91, 132,  79,  97, 188, 129, 198 },
+    {  47,  92, 133,  83,  98, 193, 132, 202 },
+    {  49,  92, 133,  86,  99, 197, 134, 205 },
+    {  52,  93, 133,  90, 100, 201, 137, 208 },
+    {  54,  93, 133,  94, 100, 204, 139, 211 },
+    {  57,  94, 134,  98, 101, 208, 142, 214 },
+    {  59,  94, 134, 101, 102, 211, 144, 216 },
+    {  62,  94, 135, 105, 103, 214, 146, 218 },
+    {  64,  94, 135, 108, 103, 216, 148, 220 },
+    {  66,  95, 135, 111, 104, 219, 151, 222 },
+    {  68,  95, 135, 114, 105, 221, 153, 223 },
+    {  71,  96, 136, 117, 106, 224, 155, 225 },
+    {  73,  96, 136, 120, 106, 225, 157, 226 },
+    {  76,  97, 136, 123, 107, 227, 159, 228 },
+    {  78,  97, 136, 126, 108, 229, 160, 229 },
+    {  80,  98, 137, 129, 109, 231, 162, 231 },
+    {  82,  98, 137, 131, 109, 232, 164, 232 },
+    {  84,  98, 138, 134, 110, 234, 166, 233 },
+    {  86,  98, 138, 137, 111, 235, 168, 234 },
+    {  89,  99, 138, 140, 112, 236, 170, 235 },
+    {  91,  99, 138, 142, 112, 237, 171, 235 },
+    {  93, 100, 139, 145, 113, 238, 173, 236 },
+    {  95, 100, 139, 147, 114, 239, 174, 237 },
+    {  97, 101, 140, 149, 115, 240, 176, 238 },
+    {  99, 101, 140, 151, 115, 241, 177, 238 },
+    { 101, 102, 140, 154, 116, 242, 179, 239 },
+    { 103, 102, 140, 156, 117, 242, 180, 239 },
+    { 105, 103, 141, 158, 118, 243, 182, 240 },
+    { 107, 103, 141, 160, 118, 243, 183, 240 },
+    { 109, 104, 141, 162, 119, 244, 185, 241 },
+    { 111, 104, 141, 164, 119, 244, 186, 241 },
+    { 113, 104, 142, 166, 120, 245, 187, 242 },
+    { 114, 104, 142, 168, 121, 245, 188, 242 },
+    { 116, 105, 143, 170, 122, 246, 190, 243 },
+    { 118, 105, 143, 171, 122, 246, 191, 243 },
+    { 120, 106, 143, 173, 123, 247, 192, 244 },
+    { 121, 106, 143, 175, 124, 247, 193, 244 },
+    { 123, 107, 144, 177, 125, 248, 195, 244 },
+    { 125, 107, 144, 178, 125, 248, 196, 244 },
+    { 127, 108, 145, 180, 126, 249, 197, 245 },
+    { 128, 108, 145, 181, 127, 249, 198, 245 },
+    { 130, 109, 145, 183, 128, 249, 199, 245 },
+    { 132, 109, 145, 184, 128, 249, 200, 245 },
+    { 134, 110, 146, 186, 129, 250, 201, 246 },
+    { 135, 110, 146, 187, 130, 250, 202, 246 },
+    { 137, 111, 147, 189, 131, 251, 203, 246 },
+    { 138, 111, 147, 190, 131, 251, 204, 246 },
+    { 140, 112, 147, 192, 132, 251, 205, 247 },
+    { 141, 112, 147, 193, 132, 251, 206, 247 },
+    { 143, 113, 148, 194, 133, 251, 207, 247 },
+    { 144, 113, 148, 195, 134, 251, 207, 247 },
+    { 146, 114, 149, 197, 135, 252, 208, 248 },
+    { 147, 114, 149, 198, 135, 252, 209, 248 },
+    { 149, 115, 149, 199, 136, 252, 210, 248 },
+    { 150, 115, 149, 200, 137, 252, 210, 248 },
+    { 152, 115, 150, 201, 138, 252, 211, 248 },
+    { 153, 115, 150, 202, 138, 252, 212, 248 },
+    { 155, 116, 151, 204, 139, 253, 213, 249 },
+    { 156, 116, 151, 205, 139, 253, 213, 249 },
+    { 158, 117, 151, 206, 140, 253, 214, 249 },
+    { 159, 117, 151, 207, 141, 253, 215, 249 },
+    { 161, 118, 152, 208, 142, 253, 216, 249 },
+    { 162, 118, 152, 209, 142, 253, 216, 249 },
+    { 163, 119, 153, 210, 143, 253, 217, 249 },
+    { 164, 119, 153, 211, 143, 253, 217, 249 },
+    { 166, 120, 153, 212, 144, 254, 218, 250 },
+    { 167, 120, 153, 212, 145, 254, 219, 250 },
+    { 168, 121, 154, 213, 146, 254, 220, 250 },
+    { 169, 121, 154, 214, 146, 254, 220, 250 },
+    { 171, 122, 155, 215, 147, 254, 221, 250 },
+    { 172, 122, 155, 216, 147, 254, 221, 250 },
+    { 173, 123, 155, 217, 148, 254, 222, 250 },
+    { 174, 123, 155, 217, 149, 254, 222, 250 },
+    { 176, 124, 156, 218, 150, 254, 223, 250 },
+    { 177, 124, 156, 219, 150, 254, 223, 250 },
+    { 178, 125, 157, 220, 151, 254, 224, 251 },
+    { 179, 125, 157, 220, 151, 254, 224, 251 },
+    { 180, 126, 157, 221, 152, 254, 225, 251 },
+    { 181, 126, 157, 221, 152, 254, 225, 251 },
+    { 183, 127, 158, 222, 153, 254, 226, 251 },
+    { 184, 127, 158, 223, 154, 254, 226, 251 },
+    { 185, 128, 159, 224, 155, 255, 227, 251 },
+    { 186, 128, 159, 224, 155, 255, 227, 251 },
+    { 187, 129, 160, 225, 156, 255, 228, 251 },
+    { 188, 130, 160, 225, 156, 255, 228, 251 },
+    { 189, 131, 160, 226, 157, 255, 228, 251 },
+    { 190, 131, 160, 226, 158, 255, 228, 251 },
+    { 191, 132, 161, 227, 159, 255, 229, 251 },
+    { 192, 132, 161, 227, 159, 255, 229, 251 },
+    { 193, 133, 162, 228, 160, 255, 230, 252 },
+    { 194, 133, 162, 229, 160, 255, 230, 252 },
+    { 195, 134, 163, 230, 161, 255, 231, 252 },
+    { 196, 134, 163, 230, 161, 255, 231, 252 },
+    { 197, 135, 163, 231, 162, 255, 231, 252 },
+    { 198, 135, 163, 231, 162, 255, 231, 252 },
+    { 199, 136, 164, 232, 163, 255, 232, 252 },
+    { 200, 136, 164, 232, 164, 255, 232, 252 },
+    { 201, 137, 165, 233, 165, 255, 233, 252 },
+    { 201, 137, 165, 233, 165, 255, 233, 252 },
+    { 202, 138, 166, 233, 166, 255, 233, 252 },
+    { 203, 138, 166, 233, 166, 255, 233, 252 },
+    { 204, 139, 166, 234, 167, 255, 234, 252 },
+    { 205, 139, 166, 234, 167, 255, 234, 252 },
+    { 206, 140, 167, 235, 168, 255, 235, 252 },
+    { 206, 140, 167, 235, 168, 255, 235, 252 },
+    { 207, 141, 168, 236, 169, 255, 235, 252 },
+    { 208, 141, 168, 236, 170, 255, 235, 252 },
+    { 209, 142, 169, 237, 171, 255, 236, 252 },
+    { 209, 143, 169, 237, 171, 255, 236, 252 },
+    { 210, 144, 169, 237, 172, 255, 236, 252 },
+    { 211, 144, 169, 237, 172, 255, 236, 252 },
+    { 212, 145, 170, 238, 173, 255, 237, 252 },
+    { 213, 145, 170, 238, 173, 255, 237, 252 },
+    { 214, 146, 171, 239, 174, 255, 237, 253 },
+    { 214, 146, 171, 239, 174, 255, 237, 253 },
+    { 215, 147, 172, 240, 175, 255, 238, 253 },
+    { 215, 147, 172, 240, 175, 255, 238, 253 },
+    { 216, 148, 173, 240, 176, 255, 238, 253 },
+    { 217, 148, 173, 240, 176, 255, 238, 253 },
+    { 218, 149, 173, 241, 177, 255, 239, 253 },
+    { 218, 149, 173, 241, 178, 255, 239, 253 },
+    { 219, 150, 174, 241, 179, 255, 239, 253 },
+    { 219, 151, 174, 241, 179, 255, 239, 253 },
+    { 220, 152, 175, 242, 180, 255, 240, 253 },
+    { 221, 152, 175, 242, 180, 255, 240, 253 },
+    { 222, 153, 176, 242, 181, 255, 240, 253 },
+    { 222, 153, 176, 242, 181, 255, 240, 253 },
+    { 223, 154, 177, 243, 182, 255, 240, 253 },
+    { 223, 154, 177, 243, 182, 255, 240, 253 },
+    { 224, 155, 178, 244, 183, 255, 241, 253 },
+    { 224, 155, 178, 244, 183, 255, 241, 253 },
+    { 225, 156, 178, 244, 184, 255, 241, 253 },
+    { 225, 157, 178, 244, 184, 255, 241, 253 },
+    { 226, 158, 179, 244, 185, 255, 242, 253 },
+    { 227, 158, 179, 244, 185, 255, 242, 253 },
+    { 228, 159, 180, 245, 186, 255, 242, 253 },
+    { 228, 159, 180, 245, 186, 255, 242, 253 },
+    { 229, 160, 181, 245, 187, 255, 242, 253 },
+    { 229, 160, 181, 245, 187, 255, 242, 253 },
+    { 230, 161, 182, 246, 188, 255, 243, 253 },
+    { 230, 162, 182, 246, 188, 255, 243, 253 },
+    { 231, 163, 183, 246, 189, 255, 243, 253 },
+    { 231, 163, 183, 246, 189, 255, 243, 253 },
+    { 232, 164, 184, 247, 190, 255, 243, 253 },
+    { 232, 164, 184, 247, 190, 255, 243, 253 },
+    { 233, 165, 185, 247, 191, 255, 244, 253 },
+    { 233, 165, 185, 247, 191, 255, 244, 253 },
+    { 234, 166, 185, 247, 192, 255, 244, 253 },
+    { 234, 167, 185, 247, 192, 255, 244, 253 },
+    { 235, 168, 186, 248, 193, 255, 244, 253 },
+    { 235, 168, 186, 248, 193, 255, 244, 253 },
+    { 236, 169, 187, 248, 194, 255, 244, 253 },
+    { 236, 169, 187, 248, 194, 255, 244, 253 },
+    { 236, 170, 188, 248, 195, 255, 245, 253 },
+    { 236, 170, 188, 248, 195, 255, 245, 253 },
+    { 237, 171, 189, 249, 196, 255, 245, 254 },
+    { 237, 172, 189, 249, 196, 255, 245, 254 },
+    { 238, 173, 190, 249, 197, 255, 245, 254 },
+    { 238, 173, 190, 249, 197, 255, 245, 254 },
+    { 239, 174, 191, 249, 198, 255, 245, 254 },
+    { 239, 174, 191, 249, 198, 255, 245, 254 },
+    { 240, 175, 192, 249, 199, 255, 246, 254 },
+    { 240, 176, 192, 249, 199, 255, 246, 254 },
+    { 240, 177, 193, 250, 200, 255, 246, 254 },
+    { 240, 177, 193, 250, 200, 255, 246, 254 },
+    { 241, 178, 194, 250, 201, 255, 246, 254 },
+    { 241, 178, 194, 250, 201, 255, 246, 254 },
+    { 242, 179, 195, 250, 202, 255, 246, 254 },
+    { 242, 180, 195, 250, 202, 255, 246, 254 },
+    { 242, 181, 196, 250, 203, 255, 247, 254 },
+    { 242, 181, 196, 250, 203, 255, 247, 254 },
+    { 243, 182, 197, 251, 204, 255, 247, 254 },
+    { 243, 183, 197, 251, 204, 255, 247, 254 },
+    { 244, 184, 198, 251, 205, 255, 247, 254 },
+    { 244, 184, 198, 251, 205, 255, 247, 254 },
+    { 244, 185, 199, 251, 206, 255, 247, 254 },
+    { 244, 185, 199, 251, 206, 255, 247, 254 },
+    { 245, 186, 200, 251, 207, 255, 247, 254 },
+    { 245, 187, 200, 251, 207, 255, 247, 254 },
+    { 246, 188, 201, 252, 207, 255, 248, 254 },
+    { 246, 188, 201, 252, 207, 255, 248, 254 },
+    { 246, 189, 202, 252, 208, 255, 248, 254 },
+    { 246, 190, 202, 252, 208, 255, 248, 254 },
+    { 247, 191, 203, 252, 209, 255, 248, 254 },
+    { 247, 191, 203, 252, 209, 255, 248, 254 },
+    { 247, 192, 204, 252, 210, 255, 248, 254 },
+    { 247, 193, 204, 252, 210, 255, 248, 254 },
+    { 248, 194, 205, 252, 211, 255, 248, 254 },
+    { 248, 194, 205, 252, 211, 255, 248, 254 },
+    { 248, 195, 206, 252, 212, 255, 249, 254 },
+    { 248, 196, 206, 252, 212, 255, 249, 254 },
+    { 249, 197, 207, 253, 213, 255, 249, 254 },
+    { 249, 197, 207, 253, 213, 255, 249, 254 },
+    { 249, 198, 208, 253, 214, 255, 249, 254 },
+    { 249, 199, 209, 253, 214, 255, 249, 254 },
+    { 250, 200, 210, 253, 215, 255, 249, 254 },
+    { 250, 200, 210, 253, 215, 255, 249, 254 },
+    { 250, 201, 211, 253, 215, 255, 249, 254 },
+    { 250, 202, 211, 253, 215, 255, 249, 254 },
+    { 250, 203, 212, 253, 216, 255, 249, 254 },
+    { 250, 203, 212, 253, 216, 255, 249, 254 },
+    { 251, 204, 213, 253, 217, 255, 250, 254 },
+    { 251, 205, 213, 253, 217, 255, 250, 254 },
+    { 251, 206, 214, 254, 218, 255, 250, 254 },
+    { 251, 206, 215, 254, 218, 255, 250, 254 },
+    { 252, 207, 216, 254, 219, 255, 250, 254 },
+    { 252, 208, 216, 254, 219, 255, 250, 254 },
+    { 252, 209, 217, 254, 220, 255, 250, 254 },
+    { 252, 210, 217, 254, 220, 255, 250, 254 },
+    { 252, 211, 218, 254, 221, 255, 250, 254 },
+    { 252, 212, 218, 254, 221, 255, 250, 254 },
+    { 253, 213, 219, 254, 222, 255, 250, 254 },
+    { 253, 213, 220, 254, 222, 255, 250, 254 },
+    { 253, 214, 221, 254, 223, 255, 250, 254 },
+    { 253, 215, 221, 254, 223, 255, 250, 254 },
+    { 253, 216, 222, 254, 224, 255, 251, 254 },
+    { 253, 217, 223, 254, 224, 255, 251, 254 },
+    { 253, 218, 224, 254, 225, 255, 251, 254 },
+    { 253, 219, 224, 254, 225, 255, 251, 254 },
+    { 254, 220, 225, 254, 225, 255, 251, 254 },
+    { 254, 221, 226, 254, 225, 255, 251, 254 },
+    { 254, 222, 227, 255, 226, 255, 251, 254 },
+    { 254, 223, 227, 255, 226, 255, 251, 254 },
+    { 254, 224, 228, 255, 227, 255, 251, 254 },
+    { 254, 225, 229, 255, 227, 255, 251, 254 },
+    { 254, 226, 230, 255, 228, 255, 251, 254 },
+    { 254, 227, 230, 255, 229, 255, 251, 254 },
+    { 255, 228, 231, 255, 230, 255, 251, 254 },
+    { 255, 229, 232, 255, 230, 255, 251, 254 },
+    { 255, 230, 233, 255, 231, 255, 252, 254 },
+    { 255, 231, 234, 255, 231, 255, 252, 254 },
+    { 255, 232, 235, 255, 232, 255, 252, 254 },
+    { 255, 233, 236, 255, 232, 255, 252, 254 },
+    { 255, 235, 237, 255, 233, 255, 252, 254 },
+    { 255, 236, 238, 255, 234, 255, 252, 254 },
+    { 255, 238, 240, 255, 235, 255, 252, 255 },
+    { 255, 239, 241, 255, 235, 255, 252, 254 },
+    { 255, 241, 243, 255, 236, 255, 252, 254 },
+    { 255, 243, 245, 255, 237, 255, 252, 254 },
+    { 255, 246, 247, 255, 239, 255, 253, 255 },
+};
+
+typedef struct {
+    uint8_t y_mode[4][9];
+    uint8_t uv_mode[10][9];
+    uint8_t filter[4][2];
+    uint8_t mv_mode[7][3];
+    uint8_t intra[4];
+    uint8_t comp[5];
+    uint8_t single_ref[5][2];
+    uint8_t comp_ref[5];
+    uint8_t tx32p[2][3];
+    uint8_t tx16p[2][2];
+    uint8_t tx8p[2];
+    uint8_t skip[3];
+    uint8_t mv_joint[3];
+    struct {
+        uint8_t sign;
+        uint8_t classes[10];
+        uint8_t class0;
+        uint8_t bits[10];
+        uint8_t class0_fp[2][3];
+        uint8_t fp[3];
+        uint8_t class0_hp;
+        uint8_t hp;
+    } mv_comp[2];
+    uint8_t partition[4][4][3];
+} prob_context;
+
+static const prob_context vp9_default_probs = {
+    { /* y_mode */
+        {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* bsize < 8x8 */,
+        { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* bsize < 16x16 */,
+        { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* bsize < 32x32 */,
+        { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* bsize >= 32x32 */
+    }, { /* uv_mode */
+        {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
+        {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
+        { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
+        {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
+        {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
+        {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
+        {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
+        {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
+        {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d27 */,
+        { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
+    }, { /* filter */
+        { 235, 162, },
+        {  36, 255, },
+        {  34,   3, },
+        { 149, 144, },
+    }, { /* mv_mode */
+        {  2, 173,  34},  // 0 = both zero mv
+        {  7, 145,  85},  // 1 = one zero mv + one a predicted mv
+        {  7, 166,  63},  // 2 = two predicted mvs
+        {  7,  94,  66},  // 3 = one predicted/zero and one new mv
+        {  8,  64,  46},  // 4 = two new mvs
+        { 17,  81,  31},  // 5 = one intra neighbour + x
+        { 25,  29,  30},  // 6 = two intra neighbours
+    }, { /* intra */
+        9, 102, 187, 225
+    }, { /* comp */
+        239, 183, 119,  96,  41
+    }, { /* single_ref */
+        {  33,  16 },
+        {  77,  74 },
+        { 142, 142 },
+        { 172, 170 },
+        { 238, 247 }
+    }, { /* comp_ref */
+        50, 126, 123, 221, 226
+    }, { /* tx32p */
+        { 3, 136, 37, },
+        { 5,  52, 13, },
+    }, { /* tx16p */
+        { 20, 152, },
+        { 15, 101, },
+    }, { /* tx8p */
+        100, 66
+    }, { /* skip */
+        192, 128, 64
+    }, { /* mv_joint */
+        32, 64, 96
+    }, {
+        { /* mv vertical component */
+            128, /* sign */
+            { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, /* class */
+            216, /* class0 */
+            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */
+            { /* class0_fp */
+                { 128, 128, 64 },
+                {  96, 112, 64 }
+            },
+            { 64, 96, 64 }, /* fp */
+            160, /* class0_hp bit */
+            128, /* hp */
+        }, { /* mv horizontal component */
+            128, /* sign */
+            { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, /* class */
+            208, /* class0 */
+            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */
+            { /* class0_fp */
+                { 128, 128, 64 },
+                {  96, 112, 64 }
+            },
+            { 64, 96, 64 }, /* fp */
+            160, /* class0_hp bit */
+            128, /* hp */
+        }
+    }, { /* partition */
+        { /* 64x64 -> 32x32 */
+            { 222,  34,  30 } /* a/l both not split */,
+            {  72,  16,  44 } /* a split, l not split */,
+            {  58,  32,  12 } /* l split, a not split */,
+            {  10,   7,   6 } /* a/l both split */,
+        }, { /* 32x32 -> 16x16 */
+            { 177,  58,  59 } /* a/l both not split */,
+            {  68,  26,  63 } /* a split, l not split */,
+            {  52,  79,  25 } /* l split, a not split */,
+            {  17,  14,  12 } /* a/l both split */,
+        }, { /* 16x16 -> 8x8 */
+            { 174,  73,  87 } /* a/l both not split */,
+            {  92,  41,  83 } /* a split, l not split */,
+            {  82,  99,  50 } /* l split, a not split */,
+            {  53,  39,  39 } /* a/l both split */,
+        }, { /* 8x8 -> 4x4 */
+            { 199, 122, 141 } /* a/l both not split */,
+            { 147,  63, 159 } /* a split, l not split */,
+            { 148, 133, 118 } /* l split, a not split */,
+            { 121, 104, 114 } /* a/l both split */,
+        }
+    },
+};
+
+static const uint8_t vp9_default_coef_probs[4][2][2][6][6][3] = {
+    { /* tx = 4x4 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 195,  29, 183 },
+                    {  84,  49, 136 },
+                    {   8,  42,  71 }
+                }, { /* Coeff Band 1 */
+                    {  31, 107, 169 },
+                    {  35,  99, 159 },
+                    {  17,  82, 140 },
+                    {   8,  66, 114 },
+                    {   2,  44,  76 },
+                    {   1,  19,  32 }
+                }, { /* Coeff Band 2 */
+                    {  40, 132, 201 },
+                    {  29, 114, 187 },
+                    {  13,  91, 157 },
+                    {   7,  75, 127 },
+                    {   3,  58,  95 },
+                    {   1,  28,  47 }
+                }, { /* Coeff Band 3 */
+                    {  69, 142, 221 },
+                    {  42, 122, 201 },
+                    {  15,  91, 159 },
+                    {   6,  67, 121 },
+                    {   1,  42,  77 },
+                    {   1,  17,  31 }
+                }, { /* Coeff Band 4 */
+                    { 102, 148, 228 },
+                    {  67, 117, 204 },
+                    {  17,  82, 154 },
+                    {   6,  59, 114 },
+                    {   2,  39,  75 },
+                    {   1,  15,  29 }
+                }, { /* Coeff Band 5 */
+                    { 156,  57, 233 },
+                    { 119,  57, 212 },
+                    {  58,  48, 163 },
+                    {  29,  40, 124 },
+                    {  12,  30,  81 },
+                    {   3,  12,  31 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 191, 107, 226 },
+                    { 124, 117, 204 },
+                    {  25,  99, 155 }
+                }, { /* Coeff Band 1 */
+                    {  29, 148, 210 },
+                    {  37, 126, 194 },
+                    {   8,  93, 157 },
+                    {   2,  68, 118 },
+                    {   1,  39,  69 },
+                    {   1,  17,  33 }
+                }, { /* Coeff Band 2 */
+                    {  41, 151, 213 },
+                    {  27, 123, 193 },
+                    {   3,  82, 144 },
+                    {   1,  58, 105 },
+                    {   1,  32,  60 },
+                    {   1,  13,  26 }
+                }, { /* Coeff Band 3 */
+                    {  59, 159, 220 },
+                    {  23, 126, 198 },
+                    {   4,  88, 151 },
+                    {   1,  66, 114 },
+                    {   1,  38,  71 },
+                    {   1,  18,  34 }
+                }, { /* Coeff Band 4 */
+                    { 114, 136, 232 },
+                    {  51, 114, 207 },
+                    {  11,  83, 155 },
+                    {   3,  56, 105 },
+                    {   1,  33,  65 },
+                    {   1,  17,  34 }
+                }, { /* Coeff Band 5 */
+                    { 149,  65, 234 },
+                    { 121,  57, 215 },
+                    {  61,  49, 166 },
+                    {  28,  36, 114 },
+                    {  12,  25,  76 },
+                    {   3,  16,  42 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 214,  49, 220 },
+                    { 132,  63, 188 },
+                    {  42,  65, 137 }
+                }, { /* Coeff Band 1 */
+                    {  85, 137, 221 },
+                    { 104, 131, 216 },
+                    {  49, 111, 192 },
+                    {  21,  87, 155 },
+                    {   2,  49,  87 },
+                    {   1,  16,  28 }
+                }, { /* Coeff Band 2 */
+                    {  89, 163, 230 },
+                    {  90, 137, 220 },
+                    {  29, 100, 183 },
+                    {  10,  70, 135 },
+                    {   2,  42,  81 },
+                    {   1,  17,  33 }
+                }, { /* Coeff Band 3 */
+                    { 108, 167, 237 },
+                    {  55, 133, 222 },
+                    {  15,  97, 179 },
+                    {   4,  72, 135 },
+                    {   1,  45,  85 },
+                    {   1,  19,  38 }
+                }, { /* Coeff Band 4 */
+                    { 124, 146, 240 },
+                    {  66, 124, 224 },
+                    {  17,  88, 175 },
+                    {   4,  58, 122 },
+                    {   1,  36,  75 },
+                    {   1,  18,  37 }
+                }, { /* Coeff Band 5 */
+                    { 141,  79, 241 },
+                    { 126,  70, 227 },
+                    {  66,  58, 182 },
+                    {  30,  44, 136 },
+                    {  12,  34,  96 },
+                    {   2,  20,  47 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 229,  99, 249 },
+                    { 143, 111, 235 },
+                    {  46, 109, 192 }
+                }, { /* Coeff Band 1 */
+                    {  82, 158, 236 },
+                    {  94, 146, 224 },
+                    {  25, 117, 191 },
+                    {   9,  87, 149 },
+                    {   3,  56,  99 },
+                    {   1,  33,  57 }
+                }, { /* Coeff Band 2 */
+                    {  83, 167, 237 },
+                    {  68, 145, 222 },
+                    {  10, 103, 177 },
+                    {   2,  72, 131 },
+                    {   1,  41,  79 },
+                    {   1,  20,  39 }
+                }, { /* Coeff Band 3 */
+                    {  99, 167, 239 },
+                    {  47, 141, 224 },
+                    {  10, 104, 178 },
+                    {   2,  73, 133 },
+                    {   1,  44,  85 },
+                    {   1,  22,  47 }
+                }, { /* Coeff Band 4 */
+                    { 127, 145, 243 },
+                    {  71, 129, 228 },
+                    {  17,  93, 177 },
+                    {   3,  61, 124 },
+                    {   1,  41,  84 },
+                    {   1,  21,  52 }
+                }, { /* Coeff Band 5 */
+                    { 157,  78, 244 },
+                    { 140,  72, 231 },
+                    {  69,  58, 184 },
+                    {  31,  44, 137 },
+                    {  14,  38, 105 },
+                    {   8,  23,  61 }
+                }
+            }
+        }
+    }, { /* tx = 8x8 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 125,  34, 187 },
+                    {  52,  41, 133 },
+                    {   6,  31,  56 }
+                }, { /* Coeff Band 1 */
+                    {  37, 109, 153 },
+                    {  51, 102, 147 },
+                    {  23,  87, 128 },
+                    {   8,  67, 101 },
+                    {   1,  41,  63 },
+                    {   1,  19,  29 }
+                }, { /* Coeff Band 2 */
+                    {  31, 154, 185 },
+                    {  17, 127, 175 },
+                    {   6,  96, 145 },
+                    {   2,  73, 114 },
+                    {   1,  51,  82 },
+                    {   1,  28,  45 }
+                }, { /* Coeff Band 3 */
+                    {  23, 163, 200 },
+                    {  10, 131, 185 },
+                    {   2,  93, 148 },
+                    {   1,  67, 111 },
+                    {   1,  41,  69 },
+                    {   1,  14,  24 }
+                }, { /* Coeff Band 4 */
+                    {  29, 176, 217 },
+                    {  12, 145, 201 },
+                    {   3, 101, 156 },
+                    {   1,  69, 111 },
+                    {   1,  39,  63 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 5 */
+                    {  57, 192, 233 },
+                    {  25, 154, 215 },
+                    {   6, 109, 167 },
+                    {   3,  78, 118 },
+                    {   1,  48,  69 },
+                    {   1,  21,  29 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 202, 105, 245 },
+                    { 108, 106, 216 },
+                    {  18,  90, 144 }
+                }, { /* Coeff Band 1 */
+                    {  33, 172, 219 },
+                    {  64, 149, 206 },
+                    {  14, 117, 177 },
+                    {   5,  90, 141 },
+                    {   2,  61,  95 },
+                    {   1,  37,  57 }
+                }, { /* Coeff Band 2 */
+                    {  33, 179, 220 },
+                    {  11, 140, 198 },
+                    {   1,  89, 148 },
+                    {   1,  60, 104 },
+                    {   1,  33,  57 },
+                    {   1,  12,  21 }
+                }, { /* Coeff Band 3 */
+                    {  30, 181, 221 },
+                    {   8, 141, 198 },
+                    {   1,  87, 145 },
+                    {   1,  58, 100 },
+                    {   1,  31,  55 },
+                    {   1,  12,  20 }
+                }, { /* Coeff Band 4 */
+                    {  32, 186, 224 },
+                    {   7, 142, 198 },
+                    {   1,  86, 143 },
+                    {   1,  58, 100 },
+                    {   1,  31,  55 },
+                    {   1,  12,  22 }
+                }, { /* Coeff Band 5 */
+                    {  57, 192, 227 },
+                    {  20, 143, 204 },
+                    {   3,  96, 154 },
+                    {   1,  68, 112 },
+                    {   1,  42,  69 },
+                    {   1,  19,  32 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 212,  35, 215 },
+                    { 113,  47, 169 },
+                    {  29,  48, 105 }
+                }, { /* Coeff Band 1 */
+                    {  74, 129, 203 },
+                    { 106, 120, 203 },
+                    {  49, 107, 178 },
+                    {  19,  84, 144 },
+                    {   4,  50,  84 },
+                    {   1,  15,  25 }
+                }, { /* Coeff Band 2 */
+                    {  71, 172, 217 },
+                    {  44, 141, 209 },
+                    {  15, 102, 173 },
+                    {   6,  76, 133 },
+                    {   2,  51,  89 },
+                    {   1,  24,  42 }
+                }, { /* Coeff Band 3 */
+                    {  64, 185, 231 },
+                    {  31, 148, 216 },
+                    {   8, 103, 175 },
+                    {   3,  74, 131 },
+                    {   1,  46,  81 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 4 */
+                    {  65, 196, 235 },
+                    {  25, 157, 221 },
+                    {   5, 105, 174 },
+                    {   1,  67, 120 },
+                    {   1,  38,  69 },
+                    {   1,  15,  30 }
+                }, { /* Coeff Band 5 */
+                    {  65, 204, 238 },
+                    {  30, 156, 224 },
+                    {   7, 107, 177 },
+                    {   2,  70, 124 },
+                    {   1,  42,  73 },
+                    {   1,  18,  34 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 225,  86, 251 },
+                    { 144, 104, 235 },
+                    {  42,  99, 181 }
+                }, { /* Coeff Band 1 */
+                    {  85, 175, 239 },
+                    { 112, 165, 229 },
+                    {  29, 136, 200 },
+                    {  12, 103, 162 },
+                    {   6,  77, 123 },
+                    {   2,  53,  84 }
+                }, { /* Coeff Band 2 */
+                    {  75, 183, 239 },
+                    {  30, 155, 221 },
+                    {   3, 106, 171 },
+                    {   1,  74, 128 },
+                    {   1,  44,  76 },
+                    {   1,  17,  28 }
+                }, { /* Coeff Band 3 */
+                    {  73, 185, 240 },
+                    {  27, 159, 222 },
+                    {   2, 107, 172 },
+                    {   1,  75, 127 },
+                    {   1,  42,  73 },
+                    {   1,  17,  29 }
+                }, { /* Coeff Band 4 */
+                    {  62, 190, 238 },
+                    {  21, 159, 222 },
+                    {   2, 107, 172 },
+                    {   1,  72, 122 },
+                    {   1,  40,  71 },
+                    {   1,  18,  32 }
+                }, { /* Coeff Band 5 */
+                    {  61, 199, 240 },
+                    {  27, 161, 226 },
+                    {   4, 113, 180 },
+                    {   1,  76, 129 },
+                    {   1,  46,  80 },
+                    {   1,  23,  41 }
+                }
+            }
+        }
+    }, { /* tx = 16x16 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    {   7,  27, 153 },
+                    {   5,  30,  95 },
+                    {   1,  16,  30 }
+                }, { /* Coeff Band 1 */
+                    {  50,  75, 127 },
+                    {  57,  75, 124 },
+                    {  27,  67, 108 },
+                    {  10,  54,  86 },
+                    {   1,  33,  52 },
+                    {   1,  12,  18 }
+                }, { /* Coeff Band 2 */
+                    {  43, 125, 151 },
+                    {  26, 108, 148 },
+                    {   7,  83, 122 },
+                    {   2,  59,  89 },
+                    {   1,  38,  60 },
+                    {   1,  17,  27 }
+                }, { /* Coeff Band 3 */
+                    {  23, 144, 163 },
+                    {  13, 112, 154 },
+                    {   2,  75, 117 },
+                    {   1,  50,  81 },
+                    {   1,  31,  51 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 4 */
+                    {  18, 162, 185 },
+                    {   6, 123, 171 },
+                    {   1,  78, 125 },
+                    {   1,  51,  86 },
+                    {   1,  31,  54 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 5 */
+                    {  15, 199, 227 },
+                    {   3, 150, 204 },
+                    {   1,  91, 146 },
+                    {   1,  55,  95 },
+                    {   1,  30,  53 },
+                    {   1,  11,  20 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    {  19,  55, 240 },
+                    {  19,  59, 196 },
+                    {   3,  52, 105 }
+                }, { /* Coeff Band 1 */
+                    {  41, 166, 207 },
+                    { 104, 153, 199 },
+                    {  31, 123, 181 },
+                    {  14, 101, 152 },
+                    {   5,  72, 106 },
+                    {   1,  36,  52 }
+                }, { /* Coeff Band 2 */
+                    {  35, 176, 211 },
+                    {  12, 131, 190 },
+                    {   2,  88, 144 },
+                    {   1,  60, 101 },
+                    {   1,  36,  60 },
+                    {   1,  16,  28 }
+                }, { /* Coeff Band 3 */
+                    {  28, 183, 213 },
+                    {   8, 134, 191 },
+                    {   1,  86, 142 },
+                    {   1,  56,  96 },
+                    {   1,  30,  53 },
+                    {   1,  12,  20 }
+                }, { /* Coeff Band 4 */
+                    {  20, 190, 215 },
+                    {   4, 135, 192 },
+                    {   1,  84, 139 },
+                    {   1,  53,  91 },
+                    {   1,  28,  49 },
+                    {   1,  11,  20 }
+                }, { /* Coeff Band 5 */
+                    {  13, 196, 216 },
+                    {   2, 137, 192 },
+                    {   1,  86, 143 },
+                    {   1,  57,  99 },
+                    {   1,  32,  56 },
+                    {   1,  13,  24 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 211,  29, 217 },
+                    {  96,  47, 156 },
+                    {  22,  43,  87 }
+                }, { /* Coeff Band 1 */
+                    {  78, 120, 193 },
+                    { 111, 116, 186 },
+                    {  46, 102, 164 },
+                    {  15,  80, 128 },
+                    {   2,  49,  76 },
+                    {   1,  18,  28 }
+                }, { /* Coeff Band 2 */
+                    {  71, 161, 203 },
+                    {  42, 132, 192 },
+                    {  10,  98, 150 },
+                    {   3,  69, 109 },
+                    {   1,  44,  70 },
+                    {   1,  18,  29 }
+                }, { /* Coeff Band 3 */
+                    {  57, 186, 211 },
+                    {  30, 140, 196 },
+                    {   4,  93, 146 },
+                    {   1,  62, 102 },
+                    {   1,  38,  65 },
+                    {   1,  16,  27 }
+                }, { /* Coeff Band 4 */
+                    {  47, 199, 217 },
+                    {  14, 145, 196 },
+                    {   1,  88, 142 },
+                    {   1,  57,  98 },
+                    {   1,  36,  62 },
+                    {   1,  15,  26 }
+                }, { /* Coeff Band 5 */
+                    {  26, 219, 229 },
+                    {   5, 155, 207 },
+                    {   1,  94, 151 },
+                    {   1,  60, 104 },
+                    {   1,  36,  62 },
+                    {   1,  16,  28 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 233,  29, 248 },
+                    { 146,  47, 220 },
+                    {  43,  52, 140 }
+                }, { /* Coeff Band 1 */
+                    { 100, 163, 232 },
+                    { 179, 161, 222 },
+                    {  63, 142, 204 },
+                    {  37, 113, 174 },
+                    {  26,  89, 137 },
+                    {  18,  68,  97 }
+                }, { /* Coeff Band 2 */
+                    {  85, 181, 230 },
+                    {  32, 146, 209 },
+                    {   7, 100, 164 },
+                    {   3,  71, 121 },
+                    {   1,  45,  77 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 3 */
+                    {  65, 187, 230 },
+                    {  20, 148, 207 },
+                    {   2,  97, 159 },
+                    {   1,  68, 116 },
+                    {   1,  40,  70 },
+                    {   1,  14,  29 }
+                }, { /* Coeff Band 4 */
+                    {  40, 194, 227 },
+                    {   8, 147, 204 },
+                    {   1,  94, 155 },
+                    {   1,  65, 112 },
+                    {   1,  39,  66 },
+                    {   1,  14,  26 }
+                }, { /* Coeff Band 5 */
+                    {  16, 208, 228 },
+                    {   3, 151, 207 },
+                    {   1,  98, 160 },
+                    {   1,  67, 117 },
+                    {   1,  41,  74 },
+                    {   1,  17,  31 }
+                }
+            }
+        }
+    }, { /* tx = 32x32 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    {  17,  38, 140 },
+                    {   7,  34,  80 },
+                    {   1,  17,  29 }
+                }, { /* Coeff Band 1 */
+                    {  37,  75, 128 },
+                    {  41,  76, 128 },
+                    {  26,  66, 116 },
+                    {  12,  52,  94 },
+                    {   2,  32,  55 },
+                    {   1,  10,  16 }
+                }, { /* Coeff Band 2 */
+                    {  50, 127, 154 },
+                    {  37, 109, 152 },
+                    {  16,  82, 121 },
+                    {   5,  59,  85 },
+                    {   1,  35,  54 },
+                    {   1,  13,  20 }
+                }, { /* Coeff Band 3 */
+                    {  40, 142, 167 },
+                    {  17, 110, 157 },
+                    {   2,  71, 112 },
+                    {   1,  44,  72 },
+                    {   1,  27,  45 },
+                    {   1,  11,  17 }
+                }, { /* Coeff Band 4 */
+                    {  30, 175, 188 },
+                    {   9, 124, 169 },
+                    {   1,  74, 116 },
+                    {   1,  48,  78 },
+                    {   1,  30,  49 },
+                    {   1,  11,  18 }
+                }, { /* Coeff Band 5 */
+                    {  10, 222, 223 },
+                    {   2, 150, 194 },
+                    {   1,  83, 128 },
+                    {   1,  48,  79 },
+                    {   1,  27,  45 },
+                    {   1,  11,  17 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    {  36,  41, 235 },
+                    {  29,  36, 193 },
+                    {  10,  27, 111 }
+                }, { /* Coeff Band 1 */
+                    {  85, 165, 222 },
+                    { 177, 162, 215 },
+                    { 110, 135, 195 },
+                    {  57, 113, 168 },
+                    {  23,  83, 120 },
+                    {  10,  49,  61 }
+                }, { /* Coeff Band 2 */
+                    {  85, 190, 223 },
+                    {  36, 139, 200 },
+                    {   5,  90, 146 },
+                    {   1,  60, 103 },
+                    {   1,  38,  65 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 3 */
+                    {  72, 202, 223 },
+                    {  23, 141, 199 },
+                    {   2,  86, 140 },
+                    {   1,  56,  97 },
+                    {   1,  36,  61 },
+                    {   1,  16,  27 }
+                }, { /* Coeff Band 4 */
+                    {  55, 218, 225 },
+                    {  13, 145, 200 },
+                    {   1,  86, 141 },
+                    {   1,  57,  99 },
+                    {   1,  35,  61 },
+                    {   1,  13,  22 }
+                }, { /* Coeff Band 5 */
+                    {  15, 235, 212 },
+                    {   1, 132, 184 },
+                    {   1,  84, 139 },
+                    {   1,  57,  97 },
+                    {   1,  34,  56 },
+                    {   1,  14,  23 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 181,  21, 201 },
+                    {  61,  37, 123 },
+                    {  10,  38,  71 }
+                }, { /* Coeff Band 1 */
+                    {  47, 106, 172 },
+                    {  95, 104, 173 },
+                    {  42,  93, 159 },
+                    {  18,  77, 131 },
+                    {   4,  50,  81 },
+                    {   1,  17,  23 }
+                }, { /* Coeff Band 2 */
+                    {  62, 147, 199 },
+                    {  44, 130, 189 },
+                    {  28, 102, 154 },
+                    {  18,  75, 115 },
+                    {   2,  44,  65 },
+                    {   1,  12,  19 }
+                }, { /* Coeff Band 3 */
+                    {  55, 153, 210 },
+                    {  24, 130, 194 },
+                    {   3,  93, 146 },
+                    {   1,  61,  97 },
+                    {   1,  31,  50 },
+                    {   1,  10,  16 }
+                }, { /* Coeff Band 4 */
+                    {  49, 186, 223 },
+                    {  17, 148, 204 },
+                    {   1,  96, 142 },
+                    {   1,  53,  83 },
+                    {   1,  26,  44 },
+                    {   1,  11,  17 }
+                }, { /* Coeff Band 5 */
+                    {  13, 217, 212 },
+                    {   2, 136, 180 },
+                    {   1,  78, 124 },
+                    {   1,  50,  83 },
+                    {   1,  29,  49 },
+                    {   1,  14,  23 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 197,  13, 247 },
+                    {  82,  17, 222 },
+                    {  25,  17, 162 }
+                }, { /* Coeff Band 1 */
+                    { 126, 186, 247 },
+                    { 234, 191, 243 },
+                    { 176, 177, 234 },
+                    { 104, 158, 220 },
+                    {  66, 128, 186 },
+                    {  55,  90, 137 }
+                }, { /* Coeff Band 2 */
+                    { 111, 197, 242 },
+                    {  46, 158, 219 },
+                    {   9, 104, 171 },
+                    {   2,  65, 125 },
+                    {   1,  44,  80 },
+                    {   1,  17,  91 }
+                }, { /* Coeff Band 3 */
+                    { 104, 208, 245 },
+                    {  39, 168, 224 },
+                    {   3, 109, 162 },
+                    {   1,  79, 124 },
+                    {   1,  50, 102 },
+                    {   1,  43, 102 }
+                }, { /* Coeff Band 4 */
+                    {  84, 220, 246 },
+                    {  31, 177, 231 },
+                    {   2, 115, 180 },
+                    {   1,  79, 134 },
+                    {   1,  55,  77 },
+                    {   1,  60,  79 }
+                }, { /* Coeff Band 5 */
+                    {  43, 243, 240 },
+                    {   8, 180, 217 },
+                    {   1, 115, 166 },
+                    {   1,  84, 121 },
+                    {   1,  51,  67 },
+                    {   1,  16,   6 }
+                }
+            }
+        }
+    }
+};
+
+enum MVJoint {
+    MV_JOINT_ZERO,
+    MV_JOINT_H,
+    MV_JOINT_V,
+    MV_JOINT_HV,
+};
+
+static const int8_t vp9_mv_joint_tree[3][2] = {
+    { -MV_JOINT_ZERO, 1 },           // '0'
+     { -MV_JOINT_H, 2 },             // '10'
+      { -MV_JOINT_V, -MV_JOINT_HV }, // '11x'
+};
+
+static const int8_t vp9_mv_class_tree[10][2] = {
+    { -0, 1 },         // '0'
+     { -1, 2 },        // '10'
+      { 3, 4 },
+       { -2, -3 },     // '110x'
+       { 5, 6 },
+        { -4, -5 },    // '1110x'
+        { -6, 7 },     // '11110'
+         { 8, 9 },
+          { -7, -8 },  // '111110x'
+          { -9, -10 }, // '111111x'
+};
+
+static const int8_t vp9_mv_fp_tree[3][2] = {
+    { -0, 1 },    // '0'
+     { -1, 2 },   // '10'
+      { -2, -3 }, // '11x'
+};
+
+#endif /* AVCODEC_VP9DATA_H */
diff --git a/media/ffvpx/libavcodec/vp9dsp.c b/media/ffvpx/libavcodec/vp9dsp.c
new file mode 100644
index 000000000..54e77e267
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp.c
@@ -0,0 +1,41 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "vp9dsp.h"
+
+av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+    if (bpp == 8) {
+        ff_vp9dsp_init_8(dsp);
+    } else if (bpp == 10) {
+        ff_vp9dsp_init_10(dsp);
+    } else {
+        av_assert0(bpp == 12);
+        ff_vp9dsp_init_12(dsp);
+    }
+
+    if (ARCH_X86) ff_vp9dsp_init_x86(dsp, bpp, bitexact);
+    if (ARCH_MIPS) ff_vp9dsp_init_mips(dsp, bpp);
+}
diff --git a/media/ffvpx/libavcodec/vp9dsp.h b/media/ffvpx/libavcodec/vp9dsp.h
new file mode 100644
index 000000000..733f5bfc4
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp.h
@@ -0,0 +1,132 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9DSP_H
+#define AVCODEC_VP9DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "vp9.h"
+
+typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+                            const uint8_t *ref, ptrdiff_t ref_stride,
+                            int h, int mx, int my);
+typedef void (*vp9_scaled_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+                                   const uint8_t *ref, ptrdiff_t ref_stride,
+                                   int h, int mx, int my, int dx, int dy);
+
+typedef struct VP9DSPContext {
+    /*
+     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32
+     * dimension 2: intra prediction modes
+     *
+     * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
+     * stride is aligned by 16 pixels
+     * top[-1] is top/left; top[4,7] is top-right for 4x4
+     */
+    // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/
+    // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place?
+    // also needs to fit in with what H.264/VP8/etc do
+    void (*intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst,
+                                                         ptrdiff_t stride,
+                                                         const uint8_t *left,
+                                                         const uint8_t *top);
+
+    /*
+     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only)
+     * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst
+     *
+     * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
+     * stride is aligned by 16 pixels
+     * block is 16-byte aligned
+     * eob indicates the position (+1) of the last non-zero coefficient,
+     * in scan-order. This can be used to write faster versions, e.g. a
+     * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32,
+     * etc.
+     */
+    // FIXME also write idct_add_block() versions for whole (inter) pred
+    // blocks, so we can do 2 4x4s at once
+    void (*itxfm_add[N_TXFM_SIZES + 1][N_TXFM_TYPES])(uint8_t *dst,
+                                                      ptrdiff_t stride,
+                                                      int16_t *block, int eob);
+
+    /*
+     * dimension 1: width of filter (0=4, 1=8, 2=16)
+     * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by 8
+     */
+    void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride,
+                                int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * The width of filter is assumed to be 16; dst/stride are aligned by 16
+     */
+    void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride,
+                              int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1/2: width of filter (0=4, 1=8) for each filter half
+     * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by operation size
+     * this basically calls loop_filter[d1][d3][0](), followed by
+     * loop_filter[d2][d3][0]() on the next 8 pixels
+     * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the
+     * integer.
+     */
+    // FIXME perhaps a mix4 that operates on 32px (for AVX2)
+    void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride,
+                                      int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4)
+     * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin)
+     * dimension 3: averaging type (0: put, 1: avg)
+     * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin)
+     * dimension 5: y subpel interpolation (0: none, 1: 8tap/bilin)
+     *
+     * dst/stride are aligned by hsize
+     */
+    vp9_mc_func mc[5][4][2][2][2];
+
+    /*
+     * for scalable MC, first 3 dimensions identical to above, the other two
+     * don't exist since it changes per stepsize.
+     */
+    vp9_scaled_mc_func smc[5][4][2];
+} VP9DSPContext;
+
+void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact);
+
+void ff_vp9dsp_init_8(VP9DSPContext *dsp);
+void ff_vp9dsp_init_10(VP9DSPContext *dsp);
+void ff_vp9dsp_init_12(VP9DSPContext *dsp);
+
+void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
+void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
+
+#endif /* AVCODEC_VP9DSP_H */
diff --git a/media/ffvpx/libavcodec/vp9dsp_10bpp.c b/media/ffvpx/libavcodec/vp9dsp_10bpp.c
new file mode 100644
index 000000000..62ce18207
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_10bpp.c
@@ -0,0 +1,26 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 10
+#define dctint int64_t
+#include "vp9dsp_template.c"
diff --git a/media/ffvpx/libavcodec/vp9dsp_12bpp.c b/media/ffvpx/libavcodec/vp9dsp_12bpp.c
new file mode 100644
index 000000000..2f36471c5
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_12bpp.c
@@ -0,0 +1,26 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 12
+#define dctint int64_t
+#include "vp9dsp_template.c"
diff --git a/media/ffvpx/libavcodec/vp9dsp_8bpp.c b/media/ffvpx/libavcodec/vp9dsp_8bpp.c
new file mode 100644
index 000000000..4b219b06b
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_8bpp.c
@@ -0,0 +1,26 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BIT_DEPTH 8
+#define dctint int
+#include "vp9dsp_template.c"
diff --git a/media/ffvpx/libavcodec/vp9dsp_template.c b/media/ffvpx/libavcodec/vp9dsp_template.c
new file mode 100644
index 000000000..4d810fec3
--- /dev/null
+++ b/media/ffvpx/libavcodec/vp9dsp_template.c
@@ -0,0 +1,2601 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "bit_depth_template.c"
+#include "vp9dsp.h"
+
+#if BIT_DEPTH != 12
+
+// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
+// back with h264pred.[ch]
+
+static void vert_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4 = AV_RN4PA(top);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, p4);
+    AV_WN4PA(dst + stride * 1, p4);
+    AV_WN4PA(dst + stride * 2, p4);
+    AV_WN4PA(dst + stride * 3, p4);
+}
+
+static void vert_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4a = AV_RN4PA(top + 0);
+    pixel4 p4b = AV_RN4PA(top + 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, p4a);
+        AV_WN4PA(dst + 4, p4b);
+        dst += stride;
+    }
+}
+
+static void vert_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4a = AV_RN4PA(top +  0);
+    pixel4 p4b = AV_RN4PA(top +  4);
+    pixel4 p4c = AV_RN4PA(top +  8);
+    pixel4 p4d = AV_RN4PA(top + 12);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, p4a);
+        AV_WN4PA(dst +  4, p4b);
+        AV_WN4PA(dst +  8, p4c);
+        AV_WN4PA(dst + 12, p4d);
+        dst += stride;
+    }
+}
+
+static void vert_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 p4a = AV_RN4PA(top +  0);
+    pixel4 p4b = AV_RN4PA(top +  4);
+    pixel4 p4c = AV_RN4PA(top +  8);
+    pixel4 p4d = AV_RN4PA(top + 12);
+    pixel4 p4e = AV_RN4PA(top + 16);
+    pixel4 p4f = AV_RN4PA(top + 20);
+    pixel4 p4g = AV_RN4PA(top + 24);
+    pixel4 p4h = AV_RN4PA(top + 28);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, p4a);
+        AV_WN4PA(dst +  4, p4b);
+        AV_WN4PA(dst +  8, p4c);
+        AV_WN4PA(dst + 12, p4d);
+        AV_WN4PA(dst + 16, p4e);
+        AV_WN4PA(dst + 20, p4f);
+        AV_WN4PA(dst + 24, p4g);
+        AV_WN4PA(dst + 28, p4h);
+        dst += stride;
+    }
+}
+
+static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                      const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3]));
+    AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2]));
+    AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1]));
+    AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0]));
+}
+
+static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                      const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]);
+
+        AV_WN4PA(dst + 0, p4);
+        AV_WN4PA(dst + 4, p4);
+        dst += stride;
+    }
+}
+
+static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                        const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]);
+
+        AV_WN4PA(dst +  0, p4);
+        AV_WN4PA(dst +  4, p4);
+        AV_WN4PA(dst +  8, p4);
+        AV_WN4PA(dst + 12, p4);
+        dst += stride;
+    }
+}
+
+static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                        const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]);
+
+        AV_WN4PA(dst +  0, p4);
+        AV_WN4PA(dst +  4, p4);
+        AV_WN4PA(dst +  8, p4);
+        AV_WN4PA(dst + 12, p4);
+        AV_WN4PA(dst + 16, p4);
+        AV_WN4PA(dst + 20, p4);
+        AV_WN4PA(dst + 24, p4);
+        AV_WN4PA(dst + 28, p4);
+        dst += stride;
+    }
+}
+
+#endif /* BIT_DEPTH != 12 */
+
+static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 4; y++) {
+        int l_m_tl = left[3 - y] - tl;
+
+        dst[0] = av_clip_pixel(top[0] + l_m_tl);
+        dst[1] = av_clip_pixel(top[1] + l_m_tl);
+        dst[2] = av_clip_pixel(top[2] + l_m_tl);
+        dst[3] = av_clip_pixel(top[3] + l_m_tl);
+        dst += stride;
+    }
+}
+
+static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        int l_m_tl = left[7 - y] - tl;
+
+        dst[0] = av_clip_pixel(top[0] + l_m_tl);
+        dst[1] = av_clip_pixel(top[1] + l_m_tl);
+        dst[2] = av_clip_pixel(top[2] + l_m_tl);
+        dst[3] = av_clip_pixel(top[3] + l_m_tl);
+        dst[4] = av_clip_pixel(top[4] + l_m_tl);
+        dst[5] = av_clip_pixel(top[5] + l_m_tl);
+        dst[6] = av_clip_pixel(top[6] + l_m_tl);
+        dst[7] = av_clip_pixel(top[7] + l_m_tl);
+        dst += stride;
+    }
+}
+
+static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        int l_m_tl = left[15 - y] - tl;
+
+        dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
+        dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
+        dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
+        dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
+        dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
+        dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
+        dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
+        dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
+        dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
+        dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
+        dst[10] = av_clip_pixel(top[10] + l_m_tl);
+        dst[11] = av_clip_pixel(top[11] + l_m_tl);
+        dst[12] = av_clip_pixel(top[12] + l_m_tl);
+        dst[13] = av_clip_pixel(top[13] + l_m_tl);
+        dst[14] = av_clip_pixel(top[14] + l_m_tl);
+        dst[15] = av_clip_pixel(top[15] + l_m_tl);
+        dst += stride;
+    }
+}
+
+static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    int y, tl = top[-1];
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        int l_m_tl = left[31 - y] - tl;
+
+        dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
+        dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
+        dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
+        dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
+        dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
+        dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
+        dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
+        dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
+        dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
+        dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
+        dst[10] = av_clip_pixel(top[10] + l_m_tl);
+        dst[11] = av_clip_pixel(top[11] + l_m_tl);
+        dst[12] = av_clip_pixel(top[12] + l_m_tl);
+        dst[13] = av_clip_pixel(top[13] + l_m_tl);
+        dst[14] = av_clip_pixel(top[14] + l_m_tl);
+        dst[15] = av_clip_pixel(top[15] + l_m_tl);
+        dst[16] = av_clip_pixel(top[16] + l_m_tl);
+        dst[17] = av_clip_pixel(top[17] + l_m_tl);
+        dst[18] = av_clip_pixel(top[18] + l_m_tl);
+        dst[19] = av_clip_pixel(top[19] + l_m_tl);
+        dst[20] = av_clip_pixel(top[20] + l_m_tl);
+        dst[21] = av_clip_pixel(top[21] + l_m_tl);
+        dst[22] = av_clip_pixel(top[22] + l_m_tl);
+        dst[23] = av_clip_pixel(top[23] + l_m_tl);
+        dst[24] = av_clip_pixel(top[24] + l_m_tl);
+        dst[25] = av_clip_pixel(top[25] + l_m_tl);
+        dst[26] = av_clip_pixel(top[26] + l_m_tl);
+        dst[27] = av_clip_pixel(top[27] + l_m_tl);
+        dst[28] = av_clip_pixel(top[28] + l_m_tl);
+        dst[29] = av_clip_pixel(top[29] + l_m_tl);
+        dst[30] = av_clip_pixel(top[30] + l_m_tl);
+        dst[31] = av_clip_pixel(top[31] + l_m_tl);
+        dst += stride;
+    }
+}
+
+#if BIT_DEPTH != 12
+
+static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] +
+                                top[0] + top[1] + top[2] + top[3] + 4) >> 3);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, dc);
+    AV_WN4PA(dst + stride * 1, dc);
+    AV_WN4PA(dst + stride * 2, dc);
+    AV_WN4PA(dst + stride * 3, dc);
+}
+
+static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                     const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + 8) >> 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, dc);
+        AV_WN4PA(dst + 4, dc);
+        dst += stride;
+    }
+}
+
+static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
+          left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
+          left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
+          top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        dst += stride;
+    }
+}
+
+static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                       const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
+          left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
+          left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
+          left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
+          left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
+          left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
+          top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
+          top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
+          top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        AV_WN4PA(dst + 16, dc);
+        AV_WN4PA(dst + 20, dc);
+        AV_WN4PA(dst + 24, dc);
+        AV_WN4PA(dst + 28, dc);
+        dst += stride;
+    }
+}
+
+static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                          const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, dc);
+    AV_WN4PA(dst + stride * 1, dc);
+    AV_WN4PA(dst + stride * 2, dc);
+    AV_WN4PA(dst + stride * 3, dc);
+}
+
+static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                          const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] +
+          left[4] + left[5] + left[6] + left[7] + 4) >> 3);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, dc);
+        AV_WN4PA(dst + 4, dc);
+        dst += stride;
+    }
+}
+
+static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                            const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
+          left[12] + left[13] + left[14] + left[15] + 8) >> 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        dst += stride;
+    }
+}
+
+static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                            const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
+          left[12] + left[13] + left[14] + left[15] + left[16] + left[17] +
+          left[18] + left[19] + left[20] + left[21] + left[22] + left[23] +
+          left[24] + left[25] + left[26] + left[27] + left[28] + left[29] +
+          left[30] + left[31] + 16) >> 5);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        AV_WN4PA(dst + 16, dc);
+        AV_WN4PA(dst + 20, dc);
+        AV_WN4PA(dst + 24, dc);
+        AV_WN4PA(dst + 28, dc);
+        dst += stride;
+    }
+}
+
+static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4((top[0] + top[1] + top[2] + top[3] + 2) >> 2);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, dc);
+    AV_WN4PA(dst + stride * 1, dc);
+    AV_WN4PA(dst + stride * 2, dc);
+    AV_WN4PA(dst + stride * 3, dc);
+}
+
+static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + 4) >> 3);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, dc);
+        AV_WN4PA(dst + 4, dc);
+        dst += stride;
+    }
+}
+
+static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
+          top[12] + top[13] + top[14] + top[15] + 8) >> 4);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        dst += stride;
+    }
+}
+
+static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    pixel4 dc = PIXEL_SPLAT_X4
+        ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
+          top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
+          top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
+          top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
+          top[30] + top[31] + 16) >> 5);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, dc);
+        AV_WN4PA(dst +  4, dc);
+        AV_WN4PA(dst +  8, dc);
+        AV_WN4PA(dst + 12, dc);
+        AV_WN4PA(dst + 16, dc);
+        AV_WN4PA(dst + 20, dc);
+        AV_WN4PA(dst + 24, dc);
+        AV_WN4PA(dst + 28, dc);
+        dst += stride;
+    }
+}
+
+#endif /* BIT_DEPTH != 12 */
+
+static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, val);
+    AV_WN4PA(dst + stride * 1, val);
+    AV_WN4PA(dst + stride * 2, val);
+    AV_WN4PA(dst + stride * 3, val);
+}
+
+static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, val);
+        AV_WN4PA(dst + 4, val);
+        dst += stride;
+    }
+}
+
+static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        dst += stride;
+    }
+}
+
+static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        AV_WN4PA(dst + 16, val);
+        AV_WN4PA(dst + 20, val);
+        AV_WN4PA(dst + 24, val);
+        AV_WN4PA(dst + 28, val);
+        dst += stride;
+    }
+}
+
+static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, val);
+    AV_WN4PA(dst + stride * 1, val);
+    AV_WN4PA(dst + stride * 2, val);
+    AV_WN4PA(dst + stride * 3, val);}
+
+static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, val);
+        AV_WN4PA(dst + 4, val);
+        dst += stride;
+    }
+}
+
+static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        dst += stride;
+    }
+}
+
+static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        AV_WN4PA(dst + 16, val);
+        AV_WN4PA(dst + 20, val);
+        AV_WN4PA(dst + 24, val);
+        AV_WN4PA(dst + 28, val);
+        dst += stride;
+    }
+}
+
+static void dc_129_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+
+    stride /= sizeof(pixel);
+    AV_WN4PA(dst + stride * 0, val);
+    AV_WN4PA(dst + stride * 1, val);
+    AV_WN4PA(dst + stride * 2, val);
+    AV_WN4PA(dst + stride * 3, val);
+}
+
+static void dc_129_8x8_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 8; y++) {
+        AV_WN4PA(dst + 0, val);
+        AV_WN4PA(dst + 4, val);
+        dst += stride;
+    }
+}
+
+static void dc_129_16x16_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 16; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        dst += stride;
+    }
+}
+
+static void dc_129_32x32_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) + 1);
+    int y;
+
+    stride /= sizeof(pixel);
+    for (y = 0; y < 32; y++) {
+        AV_WN4PA(dst +  0, val);
+        AV_WN4PA(dst +  4, val);
+        AV_WN4PA(dst +  8, val);
+        AV_WN4PA(dst + 12, val);
+        AV_WN4PA(dst + 16, val);
+        AV_WN4PA(dst + 20, val);
+        AV_WN4PA(dst + 24, val);
+        AV_WN4PA(dst + 28, val);
+        dst += stride;
+    }
+}
+
+#if BIT_DEPTH != 12
+
+#if BIT_DEPTH == 8
+#define memset_bpc memset
+#else
+static inline void memset_bpc(uint16_t *dst, int val, int len) {
+    int n;
+    for (n = 0; n < len; n++) {
+        dst[n] = val;
+    }
+}
+#endif
+
+#define DST(x, y) dst[(x) + (y) * stride]
+
+static void diag_downleft_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                                const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7];
+
+    stride /= sizeof(pixel);
+    DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
+    DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2;
+    DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
+    DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
+    DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2;
+    DST(3,3) = a7;  // note: this is different from vp8 and such
+}
+
+#define def_diag_downleft(size) \
+static void diag_downleft_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                              const uint8_t *left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    int i, j; \
+    pixel v[size - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) \
+        v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
+    v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) { \
+        memcpy(dst + j*stride, v + j, (size - 1 - j) * sizeof(pixel)); \
+        memset_bpc(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
+    } \
+}
+
+def_diag_downleft(8)
+def_diag_downleft(16)
+def_diag_downleft(32)
+
+static void diag_downright_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                                 const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    const pixel *left = (const pixel *) _left;
+    int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0];
+
+    stride /= sizeof(pixel);
+    DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
+    DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2;
+    DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2;
+}
+
+#define def_diag_downright(size) \
+static void diag_downright_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                               const uint8_t *_left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel v[size + size - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        v[i           ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
+        v[size + 1 + i] = (top[i]  + top[i + 1]  * 2 + top[i + 2]  + 2) >> 2; \
+    } \
+    v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
+    v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
+    v[size    ] = (top[-1] + top[0]  * 2 + top[ 1] + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) \
+        memcpy(dst + j*stride, v + size - 1 - j, size * sizeof(pixel)); \
+}
+
+def_diag_downright(8)
+def_diag_downright(16)
+def_diag_downright(32)
+
+static void vert_right_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                             const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    const pixel *left = (const pixel *) _left;
+    int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        l0 = left[3], l1 = left[2], l2 = left[1];
+
+    stride /= sizeof(pixel);
+    DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1;
+    DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2;
+    DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1;
+    DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1;
+    DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(3,0) = (a2 + a3 + 1) >> 1;
+    DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
+}
+
+#define def_vert_right(size) \
+static void vert_right_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                           const uint8_t *_left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel ve[size + size/2 - 1], vo[size + size/2 - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size/2 - 2; i++) { \
+        vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
+        ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
+    } \
+    vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
+    ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
+\
+    ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
+    vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
+    for (i = 0; i < size - 1; i++) { \
+        ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
+        vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
+    } \
+\
+    for (j = 0; j < size / 2; j++) { \
+        memcpy(dst +  j*2     *stride, ve + size/2 - 1 - j, size * sizeof(pixel)); \
+        memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size * sizeof(pixel)); \
+    } \
+}
+
+def_vert_right(8)
+def_vert_right(16)
+def_vert_right(32)
+
+static void hor_down_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                           const uint8_t *_left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    const pixel *left = (const pixel *) _left;
+    int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0],
+        tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2];
+
+    stride /= sizeof(pixel);
+    DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1;
+    DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2;
+    DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1;
+    DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1;
+    DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,3) = (l2 + l3 + 1) >> 1;
+    DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
+}
+
+#define def_hor_down(size) \
+static void hor_down_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                         const uint8_t *_left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel v[size * 3 - 2]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        v[i*2       ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
+        v[i*2    + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
+        v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
+    } \
+    v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
+    v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
+    v[size*2 - 1] = (top[0]  + top[-1] * 2 + left[size - 1] + 2) >> 2; \
+    v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) \
+        memcpy(dst + j*stride, v + size*2 - 2 - j*2, size * sizeof(pixel)); \
+}
+
+def_hor_down(8)
+def_hor_down(16)
+def_hor_down(32)
+
+static void vert_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                            const uint8_t *left, const uint8_t *_top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *top = (const pixel *) _top;
+    int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
+        a4 = top[4], a5 = top[5], a6 = top[6];
+
+    stride /= sizeof(pixel);
+    DST(0,0) = (a0 + a1 + 1) >> 1;
+    DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1;
+    DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2;
+    DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1;
+    DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2;
+    DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1;
+    DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
+    DST(3,2) = (a4 + a5 + 1) >> 1;
+    DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
+}
+
+#define def_vert_left(size) \
+static void vert_left_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                          const uint8_t *left, const uint8_t *_top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *top = (const pixel *) _top; \
+    int i, j; \
+    pixel ve[size - 1], vo[size - 1]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
+        vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
+    } \
+    ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
+    vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size / 2; j++) { \
+        memcpy(dst +  j*2      * stride, ve + j, (size - j - 1) * sizeof(pixel)); \
+        memset_bpc(dst +  j*2      * stride + size - j - 1, top[size - 1], j + 1); \
+        memcpy(dst + (j*2 + 1) * stride, vo + j, (size - j - 1) * sizeof(pixel)); \
+        memset_bpc(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
+    } \
+}
+
+def_vert_left(8)
+def_vert_left(16)
+def_vert_left(32)
+
+static void hor_up_4x4_c(uint8_t *_dst, ptrdiff_t stride,
+                         const uint8_t *_left, const uint8_t *top)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *left = (const pixel *) _left;
+    int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
+
+    stride /= sizeof(pixel);
+    DST(0,0) = (l0 + l1 + 1) >> 1;
+    DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1;
+    DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
+    DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1;
+    DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
+    DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3;
+}
+
+#define def_hor_up(size) \
+static void hor_up_##size##x##size##_c(uint8_t *_dst, ptrdiff_t stride, \
+                                       const uint8_t *_left, const uint8_t *top) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    const pixel *left = (const pixel *) _left; \
+    int i, j; \
+    pixel v[size*2 - 2]; \
+\
+    stride /= sizeof(pixel); \
+    for (i = 0; i < size - 2; i++) { \
+        v[i*2    ] = (left[i] + left[i + 1] + 1) >> 1; \
+        v[i*2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
+    } \
+    v[size*2 - 4] = (left[size - 2] + left[size - 1] + 1) >> 1; \
+    v[size*2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size / 2; j++) \
+        memcpy(dst + j*stride, v + j*2, size * sizeof(pixel)); \
+    for (j = size / 2; j < size; j++) { \
+        memcpy(dst + j*stride, v + j*2, (size*2 - 2 - j*2) * sizeof(pixel)); \
+        memset_bpc(dst + j*stride + size*2 - 2 - j*2, left[size - 1], \
+                   2 + j*2 - size); \
+    } \
+}
+
+def_hor_up(8)
+def_hor_up(16)
+def_hor_up(32)
+
+#undef DST
+
+#endif /* BIT_DEPTH != 12 */
+
+#if BIT_DEPTH != 8
+void ff_vp9dsp_intrapred_init_10(VP9DSPContext *dsp);
+#endif
+#if BIT_DEPTH != 10
+static
+#endif
+av_cold void FUNC(ff_vp9dsp_intrapred_init)(VP9DSPContext *dsp)
+{
+#define init_intra_pred_bd_aware(tx, sz) \
+    dsp->intra_pred[tx][TM_VP8_PRED]          = tm_##sz##_c; \
+    dsp->intra_pred[tx][DC_128_PRED]          = dc_128_##sz##_c; \
+    dsp->intra_pred[tx][DC_127_PRED]          = dc_127_##sz##_c; \
+    dsp->intra_pred[tx][DC_129_PRED]          = dc_129_##sz##_c
+
+#if BIT_DEPTH == 12
+    ff_vp9dsp_intrapred_init_10(dsp);
+#define init_intra_pred(tx, sz) \
+    init_intra_pred_bd_aware(tx, sz)
+#else
+    #define init_intra_pred(tx, sz) \
+    dsp->intra_pred[tx][VERT_PRED]            = vert_##sz##_c; \
+    dsp->intra_pred[tx][HOR_PRED]             = hor_##sz##_c; \
+    dsp->intra_pred[tx][DC_PRED]              = dc_##sz##_c; \
+    dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED]  = diag_downleft_##sz##_c; \
+    dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
+    dsp->intra_pred[tx][VERT_RIGHT_PRED]      = vert_right_##sz##_c; \
+    dsp->intra_pred[tx][HOR_DOWN_PRED]        = hor_down_##sz##_c; \
+    dsp->intra_pred[tx][VERT_LEFT_PRED]       = vert_left_##sz##_c; \
+    dsp->intra_pred[tx][HOR_UP_PRED]          = hor_up_##sz##_c; \
+    dsp->intra_pred[tx][LEFT_DC_PRED]         = dc_left_##sz##_c; \
+    dsp->intra_pred[tx][TOP_DC_PRED]          = dc_top_##sz##_c; \
+    init_intra_pred_bd_aware(tx, sz)
+#endif
+
+    init_intra_pred(TX_4X4,   4x4);
+    init_intra_pred(TX_8X8,   8x8);
+    init_intra_pred(TX_16X16, 16x16);
+    init_intra_pred(TX_32X32, 32x32);
+
+#undef init_intra_pred
+#undef init_intra_pred_bd_aware
+}
+
+#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
+static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *_dst, \
+                                                    ptrdiff_t stride, \
+                                                    int16_t *_block, int eob) \
+{ \
+    int i, j; \
+    pixel *dst = (pixel *) _dst; \
+    dctcoef *block = (dctcoef *) _block, tmp[sz * sz], out[sz]; \
+\
+    stride /= sizeof(pixel); \
+    if (has_dconly && eob == 1) { \
+        const int t  = ((((dctint) block[0] * 11585 + (1 << 13)) >> 14) \
+                                            * 11585 + (1 << 13)) >> 14; \
+        block[0] = 0; \
+        for (i = 0; i < sz; i++) { \
+            for (j = 0; j < sz; j++) \
+                dst[j * stride] = av_clip_pixel(dst[j * stride] + \
+                                                (bits ? \
+                                                 (t + (1 << (bits - 1))) >> bits : \
+                                                 t)); \
+            dst++; \
+        } \
+        return; \
+    } \
+\
+    for (i = 0; i < sz; i++) \
+        type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
+    memset(block, 0, sz * sz * sizeof(*block)); \
+    for (i = 0; i < sz; i++) { \
+        type_b##sz##_1d(tmp + i, sz, out, 1); \
+        for (j = 0; j < sz; j++) \
+            dst[j * stride] = av_clip_pixel(dst[j * stride] + \
+                                            (bits ? \
+                                             (out[j] + (1 << (bits - 1))) >> bits : \
+                                             out[j])); \
+        dst++; \
+    } \
+}
+
+#define itxfm_wrap(sz, bits) \
+itxfm_wrapper(idct,  idct,  sz, bits, 1) \
+itxfm_wrapper(iadst, idct,  sz, bits, 0) \
+itxfm_wrapper(idct,  iadst, sz, bits, 0) \
+itxfm_wrapper(iadst, iadst, sz, bits, 0)
+
+#define IN(x) ((dctint) in[(x) * stride])
+
+static av_always_inline void idct4_1d(const dctcoef *in, ptrdiff_t stride,
+                                      dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3;
+
+    t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14;
+    t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14;
+    t2 = (IN(1) *  6270 - IN(3) * 15137 + (1 << 13)) >> 14;
+    t3 = (IN(1) * 15137 + IN(3) *  6270 + (1 << 13)) >> 14;
+
+    out[0] = t0 + t3;
+    out[1] = t1 + t2;
+    out[2] = t1 - t2;
+    out[3] = t0 - t3;
+}
+
+static av_always_inline void iadst4_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3;
+
+    t0 =  5283 * IN(0) + 15212 * IN(2) +  9929 * IN(3);
+    t1 =  9929 * IN(0) -  5283 * IN(2) - 15212 * IN(3);
+    t2 = 13377 * (IN(0) - IN(2) + IN(3));
+    t3 = 13377 * IN(1);
+
+    out[0] = (t0 + t3      + (1 << 13)) >> 14;
+    out[1] = (t1 + t3      + (1 << 13)) >> 14;
+    out[2] = (t2           + (1 << 13)) >> 14;
+    out[3] = (t0 + t1 - t3 + (1 << 13)) >> 14;
+}
+
+itxfm_wrap(4, 4)
+
+static av_always_inline void idct8_1d(const dctcoef *in, ptrdiff_t stride,
+                                      dctcoef *out, int pass)
+{
+    dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
+
+    t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14;
+    t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14;
+    t2a = (IN(2) *  6270 - IN(6) * 15137 + (1 << 13)) >> 14;
+    t3a = (IN(2) * 15137 + IN(6) *  6270 + (1 << 13)) >> 14;
+    t4a = (IN(1) *  3196 - IN(7) * 16069 + (1 << 13)) >> 14;
+    t5a = (IN(5) * 13623 - IN(3) *  9102 + (1 << 13)) >> 14;
+    t6a = (IN(5) *  9102 + IN(3) * 13623 + (1 << 13)) >> 14;
+    t7a = (IN(1) * 16069 + IN(7) *  3196 + (1 << 13)) >> 14;
+
+    t0  = t0a + t3a;
+    t1  = t1a + t2a;
+    t2  = t1a - t2a;
+    t3  = t0a - t3a;
+    t4  = t4a + t5a;
+    t5a = t4a - t5a;
+    t7  = t7a + t6a;
+    t6a = t7a - t6a;
+
+    t5  = ((t6a - t5a) * 11585 + (1 << 13)) >> 14;
+    t6  = ((t6a + t5a) * 11585 + (1 << 13)) >> 14;
+
+    out[0] = t0 + t7;
+    out[1] = t1 + t6;
+    out[2] = t2 + t5;
+    out[3] = t3 + t4;
+    out[4] = t3 - t4;
+    out[5] = t2 - t5;
+    out[6] = t1 - t6;
+    out[7] = t0 - t7;
+}
+
+static av_always_inline void iadst8_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
+
+    t0a = 16305 * IN(7) +  1606 * IN(0);
+    t1a =  1606 * IN(7) - 16305 * IN(0);
+    t2a = 14449 * IN(5) +  7723 * IN(2);
+    t3a =  7723 * IN(5) - 14449 * IN(2);
+    t4a = 10394 * IN(3) + 12665 * IN(4);
+    t5a = 12665 * IN(3) - 10394 * IN(4);
+    t6a =  4756 * IN(1) + 15679 * IN(6);
+    t7a = 15679 * IN(1) -  4756 * IN(6);
+
+    t0 = (t0a + t4a + (1 << 13)) >> 14;
+    t1 = (t1a + t5a + (1 << 13)) >> 14;
+    t2 = (t2a + t6a + (1 << 13)) >> 14;
+    t3 = (t3a + t7a + (1 << 13)) >> 14;
+    t4 = (t0a - t4a + (1 << 13)) >> 14;
+    t5 = (t1a - t5a + (1 << 13)) >> 14;
+    t6 = (t2a - t6a + (1 << 13)) >> 14;
+    t7 = (t3a - t7a + (1 << 13)) >> 14;
+
+    t4a = 15137 * t4 +  6270 * t5;
+    t5a =  6270 * t4 - 15137 * t5;
+    t6a = 15137 * t7 -  6270 * t6;
+    t7a =  6270 * t7 + 15137 * t6;
+
+    out[0] =   t0 + t2;
+    out[7] = -(t1 + t3);
+    t2     =   t0 - t2;
+    t3     =   t1 - t3;
+
+    out[1] = -((t4a + t6a + (1 << 13)) >> 14);
+    out[6] =   (t5a + t7a + (1 << 13)) >> 14;
+    t6     =   (t4a - t6a + (1 << 13)) >> 14;
+    t7     =   (t5a - t7a + (1 << 13)) >> 14;
+
+    out[3] = -(((t2 + t3) * 11585 + (1 << 13)) >> 14);
+    out[4] =   ((t2 - t3) * 11585 + (1 << 13)) >> 14;
+    out[2] =   ((t6 + t7) * 11585 + (1 << 13)) >> 14;
+    out[5] = -(((t6 - t7) * 11585 + (1 << 13)) >> 14);
+}
+
+itxfm_wrap(8, 5)
+
+static av_always_inline void idct16_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
+    dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
+    dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
+
+    t0a  = ((IN(0) + IN(8)) * 11585 + (1 << 13)) >> 14;
+    t1a  = ((IN(0) - IN(8)) * 11585 + (1 << 13)) >> 14;
+    t2a  = (IN(4)  *  6270 - IN(12) * 15137 + (1 << 13)) >> 14;
+    t3a  = (IN(4)  * 15137 + IN(12) *  6270 + (1 << 13)) >> 14;
+    t4a  = (IN(2)  *  3196 - IN(14) * 16069 + (1 << 13)) >> 14;
+    t7a  = (IN(2)  * 16069 + IN(14) *  3196 + (1 << 13)) >> 14;
+    t5a  = (IN(10) * 13623 - IN(6)  *  9102 + (1 << 13)) >> 14;
+    t6a  = (IN(10) *  9102 + IN(6)  * 13623 + (1 << 13)) >> 14;
+    t8a  = (IN(1)  *  1606 - IN(15) * 16305 + (1 << 13)) >> 14;
+    t15a = (IN(1)  * 16305 + IN(15) *  1606 + (1 << 13)) >> 14;
+    t9a  = (IN(9)  * 12665 - IN(7)  * 10394 + (1 << 13)) >> 14;
+    t14a = (IN(9)  * 10394 + IN(7)  * 12665 + (1 << 13)) >> 14;
+    t10a = (IN(5)  *  7723 - IN(11) * 14449 + (1 << 13)) >> 14;
+    t13a = (IN(5)  * 14449 + IN(11) *  7723 + (1 << 13)) >> 14;
+    t11a = (IN(13) * 15679 - IN(3)  *  4756 + (1 << 13)) >> 14;
+    t12a = (IN(13) *  4756 + IN(3)  * 15679 + (1 << 13)) >> 14;
+
+    t0  = t0a  + t3a;
+    t1  = t1a  + t2a;
+    t2  = t1a  - t2a;
+    t3  = t0a  - t3a;
+    t4  = t4a  + t5a;
+    t5  = t4a  - t5a;
+    t6  = t7a  - t6a;
+    t7  = t7a  + t6a;
+    t8  = t8a  + t9a;
+    t9  = t8a  - t9a;
+    t10 = t11a - t10a;
+    t11 = t11a + t10a;
+    t12 = t12a + t13a;
+    t13 = t12a - t13a;
+    t14 = t15a - t14a;
+    t15 = t15a + t14a;
+
+    t5a  = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
+    t6a  = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
+    t9a  = (  t14 *  6270 - t9  * 15137  + (1 << 13)) >> 14;
+    t14a = (  t14 * 15137 + t9  *  6270  + (1 << 13)) >> 14;
+    t10a = (-(t13 * 15137 + t10 *  6270) + (1 << 13)) >> 14;
+    t13a = (  t13 *  6270 - t10 * 15137  + (1 << 13)) >> 14;
+
+    t0a  = t0   + t7;
+    t1a  = t1   + t6a;
+    t2a  = t2   + t5a;
+    t3a  = t3   + t4;
+    t4   = t3   - t4;
+    t5   = t2   - t5a;
+    t6   = t1   - t6a;
+    t7   = t0   - t7;
+    t8a  = t8   + t11;
+    t9   = t9a  + t10a;
+    t10  = t9a  - t10a;
+    t11a = t8   - t11;
+    t12a = t15  - t12;
+    t13  = t14a - t13a;
+    t14  = t14a + t13a;
+    t15a = t15  + t12;
+
+    t10a = ((t13  - t10)  * 11585 + (1 << 13)) >> 14;
+    t13a = ((t13  + t10)  * 11585 + (1 << 13)) >> 14;
+    t11  = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
+    t12  = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
+
+    out[ 0] = t0a + t15a;
+    out[ 1] = t1a + t14;
+    out[ 2] = t2a + t13a;
+    out[ 3] = t3a + t12;
+    out[ 4] = t4  + t11;
+    out[ 5] = t5  + t10a;
+    out[ 6] = t6  + t9;
+    out[ 7] = t7  + t8a;
+    out[ 8] = t7  - t8a;
+    out[ 9] = t6  - t9;
+    out[10] = t5  - t10a;
+    out[11] = t4  - t11;
+    out[12] = t3a - t12;
+    out[13] = t2a - t13a;
+    out[14] = t1a - t14;
+    out[15] = t0a - t15a;
+}
+
+static av_always_inline void iadst16_1d(const dctcoef *in, ptrdiff_t stride,
+                                        dctcoef *out, int pass)
+{
+    dctint t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
+    dctint t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
+    dctint t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
+
+    t0  = IN(15) * 16364 + IN(0)  *   804;
+    t1  = IN(15) *   804 - IN(0)  * 16364;
+    t2  = IN(13) * 15893 + IN(2)  *  3981;
+    t3  = IN(13) *  3981 - IN(2)  * 15893;
+    t4  = IN(11) * 14811 + IN(4)  *  7005;
+    t5  = IN(11) *  7005 - IN(4)  * 14811;
+    t6  = IN(9)  * 13160 + IN(6)  *  9760;
+    t7  = IN(9)  *  9760 - IN(6)  * 13160;
+    t8  = IN(7)  * 11003 + IN(8)  * 12140;
+    t9  = IN(7)  * 12140 - IN(8)  * 11003;
+    t10 = IN(5)  *  8423 + IN(10) * 14053;
+    t11 = IN(5)  * 14053 - IN(10) *  8423;
+    t12 = IN(3)  *  5520 + IN(12) * 15426;
+    t13 = IN(3)  * 15426 - IN(12) *  5520;
+    t14 = IN(1)  *  2404 + IN(14) * 16207;
+    t15 = IN(1)  * 16207 - IN(14) *  2404;
+
+    t0a  = (t0 + t8  + (1 << 13)) >> 14;
+    t1a  = (t1 + t9  + (1 << 13)) >> 14;
+    t2a  = (t2 + t10 + (1 << 13)) >> 14;
+    t3a  = (t3 + t11 + (1 << 13)) >> 14;
+    t4a  = (t4 + t12 + (1 << 13)) >> 14;
+    t5a  = (t5 + t13 + (1 << 13)) >> 14;
+    t6a  = (t6 + t14 + (1 << 13)) >> 14;
+    t7a  = (t7 + t15 + (1 << 13)) >> 14;
+    t8a  = (t0 - t8  + (1 << 13)) >> 14;
+    t9a  = (t1 - t9  + (1 << 13)) >> 14;
+    t10a = (t2 - t10 + (1 << 13)) >> 14;
+    t11a = (t3 - t11 + (1 << 13)) >> 14;
+    t12a = (t4 - t12 + (1 << 13)) >> 14;
+    t13a = (t5 - t13 + (1 << 13)) >> 14;
+    t14a = (t6 - t14 + (1 << 13)) >> 14;
+    t15a = (t7 - t15 + (1 << 13)) >> 14;
+
+    t8   = t8a  * 16069 + t9a  *  3196;
+    t9   = t8a  *  3196 - t9a  * 16069;
+    t10  = t10a *  9102 + t11a * 13623;
+    t11  = t10a * 13623 - t11a *  9102;
+    t12  = t13a * 16069 - t12a *  3196;
+    t13  = t13a *  3196 + t12a * 16069;
+    t14  = t15a *  9102 - t14a * 13623;
+    t15  = t15a * 13623 + t14a *  9102;
+
+    t0   = t0a + t4a;
+    t1   = t1a + t5a;
+    t2   = t2a + t6a;
+    t3   = t3a + t7a;
+    t4   = t0a - t4a;
+    t5   = t1a - t5a;
+    t6   = t2a - t6a;
+    t7   = t3a - t7a;
+    t8a  = (t8  + t12 + (1 << 13)) >> 14;
+    t9a  = (t9  + t13 + (1 << 13)) >> 14;
+    t10a = (t10 + t14 + (1 << 13)) >> 14;
+    t11a = (t11 + t15 + (1 << 13)) >> 14;
+    t12a = (t8  - t12 + (1 << 13)) >> 14;
+    t13a = (t9  - t13 + (1 << 13)) >> 14;
+    t14a = (t10 - t14 + (1 << 13)) >> 14;
+    t15a = (t11 - t15 + (1 << 13)) >> 14;
+
+    t4a  = t4 * 15137 + t5 *  6270;
+    t5a  = t4 *  6270 - t5 * 15137;
+    t6a  = t7 * 15137 - t6 *  6270;
+    t7a  = t7 *  6270 + t6 * 15137;
+    t12  = t12a * 15137 + t13a *  6270;
+    t13  = t12a *  6270 - t13a * 15137;
+    t14  = t15a * 15137 - t14a *  6270;
+    t15  = t15a *  6270 + t14a * 15137;
+
+    out[ 0] =   t0 + t2;
+    out[15] = -(t1 + t3);
+    t2a     =   t0 - t2;
+    t3a     =   t1 - t3;
+    out[ 3] = -((t4a + t6a + (1 << 13)) >> 14);
+    out[12] =   (t5a + t7a + (1 << 13)) >> 14;
+    t6      =   (t4a - t6a + (1 << 13)) >> 14;
+    t7      =   (t5a - t7a + (1 << 13)) >> 14;
+    out[ 1] = -(t8a + t10a);
+    out[14] =   t9a + t11a;
+    t10     =   t8a - t10a;
+    t11     =   t9a - t11a;
+    out[ 2] =   (t12 + t14 + (1 << 13)) >> 14;
+    out[13] = -((t13 + t15 + (1 << 13)) >> 14);
+    t14a    =   (t12 - t14 + (1 << 13)) >> 14;
+    t15a    =   (t13 - t15 + (1 << 13)) >> 14;
+
+    out[ 7] = ((t2a  + t3a)  * -11585 + (1 << 13)) >> 14;
+    out[ 8] = ((t2a  - t3a)  *  11585 + (1 << 13)) >> 14;
+    out[ 4] = ((t7   + t6)   *  11585 + (1 << 13)) >> 14;
+    out[11] = ((t7   - t6)   *  11585 + (1 << 13)) >> 14;
+    out[ 6] = ((t11  + t10)  *  11585 + (1 << 13)) >> 14;
+    out[ 9] = ((t11  - t10)  *  11585 + (1 << 13)) >> 14;
+    out[ 5] = ((t14a + t15a) * -11585 + (1 << 13)) >> 14;
+    out[10] = ((t14a - t15a) *  11585 + (1 << 13)) >> 14;
+}
+
+itxfm_wrap(16, 6)
+
+static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride,
+                                       dctcoef *out, int pass)
+{
+    dctint t0a  = ((IN(0) + IN(16)) * 11585 + (1 << 13)) >> 14;
+    dctint t1a  = ((IN(0) - IN(16)) * 11585 + (1 << 13)) >> 14;
+    dctint t2a  = (IN( 8) *  6270 - IN(24) * 15137 + (1 << 13)) >> 14;
+    dctint t3a  = (IN( 8) * 15137 + IN(24) *  6270 + (1 << 13)) >> 14;
+    dctint t4a  = (IN( 4) *  3196 - IN(28) * 16069 + (1 << 13)) >> 14;
+    dctint t7a  = (IN( 4) * 16069 + IN(28) *  3196 + (1 << 13)) >> 14;
+    dctint t5a  = (IN(20) * 13623 - IN(12) *  9102 + (1 << 13)) >> 14;
+    dctint t6a  = (IN(20) *  9102 + IN(12) * 13623 + (1 << 13)) >> 14;
+    dctint t8a  = (IN( 2) *  1606 - IN(30) * 16305 + (1 << 13)) >> 14;
+    dctint t15a = (IN( 2) * 16305 + IN(30) *  1606 + (1 << 13)) >> 14;
+    dctint t9a  = (IN(18) * 12665 - IN(14) * 10394 + (1 << 13)) >> 14;
+    dctint t14a = (IN(18) * 10394 + IN(14) * 12665 + (1 << 13)) >> 14;
+    dctint t10a = (IN(10) *  7723 - IN(22) * 14449 + (1 << 13)) >> 14;
+    dctint t13a = (IN(10) * 14449 + IN(22) *  7723 + (1 << 13)) >> 14;
+    dctint t11a = (IN(26) * 15679 - IN( 6) *  4756 + (1 << 13)) >> 14;
+    dctint t12a = (IN(26) *  4756 + IN( 6) * 15679 + (1 << 13)) >> 14;
+    dctint t16a = (IN( 1) *   804 - IN(31) * 16364 + (1 << 13)) >> 14;
+    dctint t31a = (IN( 1) * 16364 + IN(31) *   804 + (1 << 13)) >> 14;
+    dctint t17a = (IN(17) * 12140 - IN(15) * 11003 + (1 << 13)) >> 14;
+    dctint t30a = (IN(17) * 11003 + IN(15) * 12140 + (1 << 13)) >> 14;
+    dctint t18a = (IN( 9) *  7005 - IN(23) * 14811 + (1 << 13)) >> 14;
+    dctint t29a = (IN( 9) * 14811 + IN(23) *  7005 + (1 << 13)) >> 14;
+    dctint t19a = (IN(25) * 15426 - IN( 7) *  5520 + (1 << 13)) >> 14;
+    dctint t28a = (IN(25) *  5520 + IN( 7) * 15426 + (1 << 13)) >> 14;
+    dctint t20a = (IN( 5) *  3981 - IN(27) * 15893 + (1 << 13)) >> 14;
+    dctint t27a = (IN( 5) * 15893 + IN(27) *  3981 + (1 << 13)) >> 14;
+    dctint t21a = (IN(21) * 14053 - IN(11) *  8423 + (1 << 13)) >> 14;
+    dctint t26a = (IN(21) *  8423 + IN(11) * 14053 + (1 << 13)) >> 14;
+    dctint t22a = (IN(13) *  9760 - IN(19) * 13160 + (1 << 13)) >> 14;
+    dctint t25a = (IN(13) * 13160 + IN(19) *  9760 + (1 << 13)) >> 14;
+    dctint t23a = (IN(29) * 16207 - IN( 3) *  2404 + (1 << 13)) >> 14;
+    dctint t24a = (IN(29) *  2404 + IN( 3) * 16207 + (1 << 13)) >> 14;
+
+    dctint t0  = t0a  + t3a;
+    dctint t1  = t1a  + t2a;
+    dctint t2  = t1a  - t2a;
+    dctint t3  = t0a  - t3a;
+    dctint t4  = t4a  + t5a;
+    dctint t5  = t4a  - t5a;
+    dctint t6  = t7a  - t6a;
+    dctint t7  = t7a  + t6a;
+    dctint t8  = t8a  + t9a;
+    dctint t9  = t8a  - t9a;
+    dctint t10 = t11a - t10a;
+    dctint t11 = t11a + t10a;
+    dctint t12 = t12a + t13a;
+    dctint t13 = t12a - t13a;
+    dctint t14 = t15a - t14a;
+    dctint t15 = t15a + t14a;
+    dctint t16 = t16a + t17a;
+    dctint t17 = t16a - t17a;
+    dctint t18 = t19a - t18a;
+    dctint t19 = t19a + t18a;
+    dctint t20 = t20a + t21a;
+    dctint t21 = t20a - t21a;
+    dctint t22 = t23a - t22a;
+    dctint t23 = t23a + t22a;
+    dctint t24 = t24a + t25a;
+    dctint t25 = t24a - t25a;
+    dctint t26 = t27a - t26a;
+    dctint t27 = t27a + t26a;
+    dctint t28 = t28a + t29a;
+    dctint t29 = t28a - t29a;
+    dctint t30 = t31a - t30a;
+    dctint t31 = t31a + t30a;
+
+    t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
+    t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
+    t9a  = (  t14 *  6270 - t9  * 15137  + (1 << 13)) >> 14;
+    t14a = (  t14 * 15137 + t9  *  6270  + (1 << 13)) >> 14;
+    t10a = (-(t13 * 15137 + t10 *  6270) + (1 << 13)) >> 14;
+    t13a = (  t13 *  6270 - t10 * 15137  + (1 << 13)) >> 14;
+    t17a = (  t30 *  3196 - t17 * 16069  + (1 << 13)) >> 14;
+    t30a = (  t30 * 16069 + t17 *  3196  + (1 << 13)) >> 14;
+    t18a = (-(t29 * 16069 + t18 *  3196) + (1 << 13)) >> 14;
+    t29a = (  t29 *  3196 - t18 * 16069  + (1 << 13)) >> 14;
+    t21a = (  t26 * 13623 - t21 *  9102  + (1 << 13)) >> 14;
+    t26a = (  t26 *  9102 + t21 * 13623  + (1 << 13)) >> 14;
+    t22a = (-(t25 *  9102 + t22 * 13623) + (1 << 13)) >> 14;
+    t25a = (  t25 * 13623 - t22 *  9102  + (1 << 13)) >> 14;
+
+    t0a  = t0   + t7;
+    t1a  = t1   + t6a;
+    t2a  = t2   + t5a;
+    t3a  = t3   + t4;
+    t4a  = t3   - t4;
+    t5   = t2   - t5a;
+    t6   = t1   - t6a;
+    t7a  = t0   - t7;
+    t8a  = t8   + t11;
+    t9   = t9a  + t10a;
+    t10  = t9a  - t10a;
+    t11a = t8   - t11;
+    t12a = t15  - t12;
+    t13  = t14a - t13a;
+    t14  = t14a + t13a;
+    t15a = t15  + t12;
+    t16a = t16  + t19;
+    t17  = t17a + t18a;
+    t18  = t17a - t18a;
+    t19a = t16  - t19;
+    t20a = t23  - t20;
+    t21  = t22a - t21a;
+    t22  = t22a + t21a;
+    t23a = t23  + t20;
+    t24a = t24  + t27;
+    t25  = t25a + t26a;
+    t26  = t25a - t26a;
+    t27a = t24  - t27;
+    t28a = t31  - t28;
+    t29  = t30a - t29a;
+    t30  = t30a + t29a;
+    t31a = t31  + t28;
+
+    t10a = ((t13  - t10)  * 11585 + (1 << 13)) >> 14;
+    t13a = ((t13  + t10)  * 11585 + (1 << 13)) >> 14;
+    t11  = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
+    t12  = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
+    t18a = (  t29  *  6270 - t18  * 15137  + (1 << 13)) >> 14;
+    t29a = (  t29  * 15137 + t18  *  6270  + (1 << 13)) >> 14;
+    t19  = (  t28a *  6270 - t19a * 15137  + (1 << 13)) >> 14;
+    t28  = (  t28a * 15137 + t19a *  6270  + (1 << 13)) >> 14;
+    t20  = (-(t27a * 15137 + t20a *  6270) + (1 << 13)) >> 14;
+    t27  = (  t27a *  6270 - t20a * 15137  + (1 << 13)) >> 14;
+    t21a = (-(t26  * 15137 + t21  *  6270) + (1 << 13)) >> 14;
+    t26a = (  t26  *  6270 - t21  * 15137  + (1 << 13)) >> 14;
+
+    t0   = t0a + t15a;
+    t1   = t1a + t14;
+    t2   = t2a + t13a;
+    t3   = t3a + t12;
+    t4   = t4a + t11;
+    t5a  = t5  + t10a;
+    t6a  = t6  + t9;
+    t7   = t7a + t8a;
+    t8   = t7a - t8a;
+    t9a  = t6  - t9;
+    t10  = t5  - t10a;
+    t11a = t4a - t11;
+    t12a = t3a - t12;
+    t13  = t2a - t13a;
+    t14a = t1a - t14;
+    t15  = t0a - t15a;
+    t16  = t16a + t23a;
+    t17a = t17  + t22;
+    t18  = t18a + t21a;
+    t19a = t19  + t20;
+    t20a = t19  - t20;
+    t21  = t18a - t21a;
+    t22a = t17  - t22;
+    t23  = t16a - t23a;
+    t24  = t31a - t24a;
+    t25a = t30  - t25;
+    t26  = t29a - t26a;
+    t27a = t28  - t27;
+    t28a = t28  + t27;
+    t29  = t29a + t26a;
+    t30a = t30  + t25;
+    t31  = t31a + t24a;
+
+    t20  = ((t27a - t20a) * 11585 + (1 << 13)) >> 14;
+    t27  = ((t27a + t20a) * 11585 + (1 << 13)) >> 14;
+    t21a = ((t26  - t21 ) * 11585 + (1 << 13)) >> 14;
+    t26a = ((t26  + t21 ) * 11585 + (1 << 13)) >> 14;
+    t22  = ((t25a - t22a) * 11585 + (1 << 13)) >> 14;
+    t25  = ((t25a + t22a) * 11585 + (1 << 13)) >> 14;
+    t23a = ((t24  - t23 ) * 11585 + (1 << 13)) >> 14;
+    t24a = ((t24  + t23 ) * 11585 + (1 << 13)) >> 14;
+
+    out[ 0] = t0   + t31;
+    out[ 1] = t1   + t30a;
+    out[ 2] = t2   + t29;
+    out[ 3] = t3   + t28a;
+    out[ 4] = t4   + t27;
+    out[ 5] = t5a  + t26a;
+    out[ 6] = t6a  + t25;
+    out[ 7] = t7   + t24a;
+    out[ 8] = t8   + t23a;
+    out[ 9] = t9a  + t22;
+    out[10] = t10  + t21a;
+    out[11] = t11a + t20;
+    out[12] = t12a + t19a;
+    out[13] = t13  + t18;
+    out[14] = t14a + t17a;
+    out[15] = t15  + t16;
+    out[16] = t15  - t16;
+    out[17] = t14a - t17a;
+    out[18] = t13  - t18;
+    out[19] = t12a - t19a;
+    out[20] = t11a - t20;
+    out[21] = t10  - t21a;
+    out[22] = t9a  - t22;
+    out[23] = t8   - t23a;
+    out[24] = t7   - t24a;
+    out[25] = t6a  - t25;
+    out[26] = t5a  - t26a;
+    out[27] = t4   - t27;
+    out[28] = t3   - t28a;
+    out[29] = t2   - t29;
+    out[30] = t1   - t30a;
+    out[31] = t0   - t31;
+}
+
+itxfm_wrapper(idct, idct, 32, 6, 1)
+
+static av_always_inline void iwht4_1d(const dctcoef *in, ptrdiff_t stride,
+                                      dctcoef *out, int pass)
+{
+    int t0, t1, t2, t3, t4;
+
+    if (pass == 0) {
+        t0 = IN(0) >> 2;
+        t1 = IN(3) >> 2;
+        t2 = IN(1) >> 2;
+        t3 = IN(2) >> 2;
+    } else {
+        t0 = IN(0);
+        t1 = IN(3);
+        t2 = IN(1);
+        t3 = IN(2);
+    }
+
+    t0 += t2;
+    t3 -= t1;
+    t4 = (t0 - t3) >> 1;
+    t1 = t4 - t1;
+    t2 = t4 - t2;
+    t0 -= t1;
+    t3 += t2;
+
+    out[0] = t0;
+    out[1] = t1;
+    out[2] = t2;
+    out[3] = t3;
+}
+
+itxfm_wrapper(iwht, iwht, 4, 0, 0)
+
+#undef IN
+#undef itxfm_wrapper
+#undef itxfm_wrap
+
+static av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp)
+{
+#define init_itxfm(tx, sz) \
+    dsp->itxfm_add[tx][DCT_DCT]   = idct_idct_##sz##_add_c; \
+    dsp->itxfm_add[tx][DCT_ADST]  = iadst_idct_##sz##_add_c; \
+    dsp->itxfm_add[tx][ADST_DCT]  = idct_iadst_##sz##_add_c; \
+    dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
+
+#define init_idct(tx, nm) \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
+
+    init_itxfm(TX_4X4,   4x4);
+    init_itxfm(TX_8X8,   8x8);
+    init_itxfm(TX_16X16, 16x16);
+    init_idct(TX_32X32,  idct_idct_32x32);
+    init_idct(4 /* lossless */, iwht_iwht_4x4);
+
+#undef init_itxfm
+#undef init_idct
+}
+
+static av_always_inline void loop_filter(pixel *dst, int E, int I, int H,
+                                         ptrdiff_t stridea, ptrdiff_t strideb,
+                                         int wd)
+{
+    int i, F = 1 << (BIT_DEPTH - 8);
+
+    E <<= (BIT_DEPTH - 8);
+    I <<= (BIT_DEPTH - 8);
+    H <<= (BIT_DEPTH - 8);
+    for (i = 0; i < 8; i++, dst += stridea) {
+        int p7, p6, p5, p4;
+        int p3 = dst[strideb * -4], p2 = dst[strideb * -3];
+        int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
+        int q0 = dst[strideb * +0], q1 = dst[strideb * +1];
+        int q2 = dst[strideb * +2], q3 = dst[strideb * +3];
+        int q4, q5, q6, q7;
+        int fm = FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I &&
+                 FFABS(p1 - p0) <= I && FFABS(q1 - q0) <= I &&
+                 FFABS(q2 - q1) <= I && FFABS(q3 - q2) <= I &&
+                 FFABS(p0 - q0) * 2 + (FFABS(p1 - q1) >> 1) <= E;
+        int flat8out, flat8in;
+
+        if (!fm)
+            continue;
+
+        if (wd >= 16) {
+            p7 = dst[strideb * -8];
+            p6 = dst[strideb * -7];
+            p5 = dst[strideb * -6];
+            p4 = dst[strideb * -5];
+            q4 = dst[strideb * +4];
+            q5 = dst[strideb * +5];
+            q6 = dst[strideb * +6];
+            q7 = dst[strideb * +7];
+
+            flat8out = FFABS(p7 - p0) <= F && FFABS(p6 - p0) <= F &&
+                       FFABS(p5 - p0) <= F && FFABS(p4 - p0) <= F &&
+                       FFABS(q4 - q0) <= F && FFABS(q5 - q0) <= F &&
+                       FFABS(q6 - q0) <= F && FFABS(q7 - q0) <= F;
+        }
+
+        if (wd >= 8)
+            flat8in = FFABS(p3 - p0) <= F && FFABS(p2 - p0) <= F &&
+                      FFABS(p1 - p0) <= F && FFABS(q1 - q0) <= F &&
+                      FFABS(q2 - q0) <= F && FFABS(q3 - q0) <= F;
+
+        if (wd >= 16 && flat8out && flat8in) {
+            dst[strideb * -7] = (p7 + p7 + p7 + p7 + p7 + p7 + p7 + p6 * 2 +
+                                 p5 + p4 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
+            dst[strideb * -6] = (p7 + p7 + p7 + p7 + p7 + p7 + p6 + p5 * 2 +
+                                 p4 + p3 + p2 + p1 + p0 + q0 + q1 + 8) >> 4;
+            dst[strideb * -5] = (p7 + p7 + p7 + p7 + p7 + p6 + p5 + p4 * 2 +
+                                 p3 + p2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4;
+            dst[strideb * -4] = (p7 + p7 + p7 + p7 + p6 + p5 + p4 + p3 * 2 +
+                                 p2 + p1 + p0 + q0 + q1 + q2 + q3 + 8) >> 4;
+            dst[strideb * -3] = (p7 + p7 + p7 + p6 + p5 + p4 + p3 + p2 * 2 +
+                                 p1 + p0 + q0 + q1 + q2 + q3 + q4 + 8) >> 4;
+            dst[strideb * -2] = (p7 + p7 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
+                                 p0 + q0 + q1 + q2 + q3 + q4 + q5 + 8) >> 4;
+            dst[strideb * -1] = (p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
+                                 q0 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
+            dst[strideb * +0] = (p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 +
+                                 q1 + q2 + q3 + q4 + q5 + q6 + q7 + 8) >> 4;
+            dst[strideb * +1] = (p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 +
+                                 q2 + q3 + q4 + q5 + q6 + q7 + q7 + 8) >> 4;
+            dst[strideb * +2] = (p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 +
+                                 q3 + q4 + q5 + q6 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +3] = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 +
+                                 q4 + q5 + q6 + q7 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +4] = (p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 +
+                                 q5 + q6 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +5] = (p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 +
+                                 q6 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
+            dst[strideb * +6] = (p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 +
+                                 q7 + q7 + q7 + q7 + q7 + q7 + q7 + 8) >> 4;
+        } else if (wd >= 8 && flat8in) {
+            dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3;
+            dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3;
+            dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3;
+            dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3;
+            dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3;
+            dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
+        } else {
+            int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H;
+
+            if (hev) {
+                int f = av_clip_intp2(p1 - q1, BIT_DEPTH - 1), f1, f2;
+                f = av_clip_intp2(3 * (q0 - p0) + f, BIT_DEPTH - 1);
+
+                f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+                f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+
+                dst[strideb * -1] = av_clip_pixel(p0 + f2);
+                dst[strideb * +0] = av_clip_pixel(q0 - f1);
+            } else {
+                int f = av_clip_intp2(3 * (q0 - p0), BIT_DEPTH - 1), f1, f2;
+
+                f1 = FFMIN(f + 4, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+                f2 = FFMIN(f + 3, (1 << (BIT_DEPTH - 1)) - 1) >> 3;
+
+                dst[strideb * -1] = av_clip_pixel(p0 + f2);
+                dst[strideb * +0] = av_clip_pixel(q0 - f1);
+
+                f = (f1 + 1) >> 1;
+                dst[strideb * -2] = av_clip_pixel(p1 + f);
+                dst[strideb * +1] = av_clip_pixel(q1 - f);
+            }
+        }
+    }
+}
+
+#define lf_8_fn(dir, wd, stridea, strideb) \
+static void loop_filter_##dir##_##wd##_8_c(uint8_t *_dst, \
+                                           ptrdiff_t stride, \
+                                           int E, int I, int H) \
+{ \
+    pixel *dst = (pixel *) _dst; \
+    stride /= sizeof(pixel); \
+    loop_filter(dst, E, I, H, stridea, strideb, wd); \
+}
+
+#define lf_8_fns(wd) \
+lf_8_fn(h, wd, stride, 1) \
+lf_8_fn(v, wd, 1, stride)
+
+lf_8_fns(4)
+lf_8_fns(8)
+lf_8_fns(16)
+
+#undef lf_8_fn
+#undef lf_8_fns
+
+#define lf_16_fn(dir, stridea) \
+static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
+                                        ptrdiff_t stride, \
+                                        int E, int I, int H) \
+{ \
+    loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
+    loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
+}
+
+lf_16_fn(h, stride)
+lf_16_fn(v, sizeof(pixel))
+
+#undef lf_16_fn
+
+#define lf_mix_fn(dir, wd1, wd2, stridea) \
+static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
+                                                  ptrdiff_t stride, \
+                                                  int E, int I, int H) \
+{ \
+    loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
+    loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
+}
+
+#define lf_mix_fns(wd1, wd2) \
+lf_mix_fn(h, wd1, wd2, stride) \
+lf_mix_fn(v, wd1, wd2, sizeof(pixel))
+
+lf_mix_fns(4, 4)
+lf_mix_fns(4, 8)
+lf_mix_fns(8, 4)
+lf_mix_fns(8, 8)
+
+#undef lf_mix_fn
+#undef lf_mix_fns
+
+static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
+{
+    dsp->loop_filter_8[0][0] = loop_filter_h_4_8_c;
+    dsp->loop_filter_8[0][1] = loop_filter_v_4_8_c;
+    dsp->loop_filter_8[1][0] = loop_filter_h_8_8_c;
+    dsp->loop_filter_8[1][1] = loop_filter_v_8_8_c;
+    dsp->loop_filter_8[2][0] = loop_filter_h_16_8_c;
+    dsp->loop_filter_8[2][1] = loop_filter_v_16_8_c;
+
+    dsp->loop_filter_16[0] = loop_filter_h_16_16_c;
+    dsp->loop_filter_16[1] = loop_filter_v_16_16_c;
+
+    dsp->loop_filter_mix2[0][0][0] = loop_filter_h_44_16_c;
+    dsp->loop_filter_mix2[0][0][1] = loop_filter_v_44_16_c;
+    dsp->loop_filter_mix2[0][1][0] = loop_filter_h_48_16_c;
+    dsp->loop_filter_mix2[0][1][1] = loop_filter_v_48_16_c;
+    dsp->loop_filter_mix2[1][0][0] = loop_filter_h_84_16_c;
+    dsp->loop_filter_mix2[1][0][1] = loop_filter_v_84_16_c;
+    dsp->loop_filter_mix2[1][1][0] = loop_filter_h_88_16_c;
+    dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
+}
+
+#if BIT_DEPTH != 12
+
+static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                    const uint8_t *src, ptrdiff_t src_stride,
+                                    int w, int h)
+{
+    do {
+        memcpy(dst, src, w * sizeof(pixel));
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+static av_always_inline void avg_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                   const uint8_t *_src, ptrdiff_t src_stride,
+                                   int w, int h)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x += 4)
+            AV_WN4PA(&dst[x], rnd_avg_pixel4(AV_RN4PA(&dst[x]), AV_RN4P(&src[x])));
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+#define fpel_fn(type, sz) \
+static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                         const uint8_t *src, ptrdiff_t src_stride, \
+                         int h, int mx, int my) \
+{ \
+    type##_c(dst, dst_stride, src, src_stride, sz, h); \
+}
+
+#define copy_avg_fn(sz) \
+fpel_fn(copy, sz) \
+fpel_fn(avg,  sz)
+
+copy_avg_fn(64)
+copy_avg_fn(32)
+copy_avg_fn(16)
+copy_avg_fn(8)
+copy_avg_fn(4)
+
+#undef fpel_fn
+#undef copy_avg_fn
+
+#endif /* BIT_DEPTH != 12 */
+
+static const int16_t vp9_subpel_filters[3][16][8] = {
+    [FILTER_8TAP_REGULAR] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        {  0,  1,  -5, 126,   8,  -3,  1,  0 },
+        { -1,  3, -10, 122,  18,  -6,  2,  0 },
+        { -1,  4, -13, 118,  27,  -9,  3, -1 },
+        { -1,  4, -16, 112,  37, -11,  4, -1 },
+        { -1,  5, -18, 105,  48, -14,  4, -1 },
+        { -1,  5, -19,  97,  58, -16,  5, -1 },
+        { -1,  6, -19,  88,  68, -18,  5, -1 },
+        { -1,  6, -19,  78,  78, -19,  6, -1 },
+        { -1,  5, -18,  68,  88, -19,  6, -1 },
+        { -1,  5, -16,  58,  97, -19,  5, -1 },
+        { -1,  4, -14,  48, 105, -18,  5, -1 },
+        { -1,  4, -11,  37, 112, -16,  4, -1 },
+        { -1,  3,  -9,  27, 118, -13,  4, -1 },
+        {  0,  2,  -6,  18, 122, -10,  3, -1 },
+        {  0,  1,  -3,   8, 126,  -5,  1,  0 },
+    }, [FILTER_8TAP_SHARP] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        { -1,  3,  -7, 127,   8,  -3,  1,  0 },
+        { -2,  5, -13, 125,  17,  -6,  3, -1 },
+        { -3,  7, -17, 121,  27, -10,  5, -2 },
+        { -4,  9, -20, 115,  37, -13,  6, -2 },
+        { -4, 10, -23, 108,  48, -16,  8, -3 },
+        { -4, 10, -24, 100,  59, -19,  9, -3 },
+        { -4, 11, -24,  90,  70, -21, 10, -4 },
+        { -4, 11, -23,  80,  80, -23, 11, -4 },
+        { -4, 10, -21,  70,  90, -24, 11, -4 },
+        { -3,  9, -19,  59, 100, -24, 10, -4 },
+        { -3,  8, -16,  48, 108, -23, 10, -4 },
+        { -2,  6, -13,  37, 115, -20,  9, -4 },
+        { -2,  5, -10,  27, 121, -17,  7, -3 },
+        { -1,  3,  -6,  17, 125, -13,  5, -2 },
+        {  0,  1,  -3,   8, 127,  -7,  3, -1 },
+    }, [FILTER_8TAP_SMOOTH] = {
+        {  0,  0,   0, 128,   0,   0,  0,  0 },
+        { -3, -1,  32,  64,  38,   1, -3,  0 },
+        { -2, -2,  29,  63,  41,   2, -3,  0 },
+        { -2, -2,  26,  63,  43,   4, -4,  0 },
+        { -2, -3,  24,  62,  46,   5, -4,  0 },
+        { -2, -3,  21,  60,  49,   7, -4,  0 },
+        { -1, -4,  18,  59,  51,   9, -4,  0 },
+        { -1, -4,  16,  57,  53,  12, -4, -1 },
+        { -1, -4,  14,  55,  55,  14, -4, -1 },
+        { -1, -4,  12,  53,  57,  16, -4, -1 },
+        {  0, -4,   9,  51,  59,  18, -4, -1 },
+        {  0, -4,   7,  49,  60,  21, -3, -2 },
+        {  0, -4,   5,  46,  62,  24, -3, -2 },
+        {  0, -4,   4,  43,  63,  26, -2, -2 },
+        {  0, -3,   2,  41,  63,  29, -2, -2 },
+        {  0, -3,   1,  38,  64,  32, -1, -3 },
+    }
+};
+
+#define FILTER_8TAP(src, x, F, stride) \
+    av_clip_pixel((F[0] * src[x + -3 * stride] + \
+                   F[1] * src[x + -2 * stride] + \
+                   F[2] * src[x + -1 * stride] + \
+                   F[3] * src[x + +0 * stride] + \
+                   F[4] * src[x + +1 * stride] + \
+                   F[5] * src[x + +2 * stride] + \
+                   F[6] * src[x + +3 * stride] + \
+                   F[7] * src[x + +4 * stride] + 64) >> 7)
+
+static av_always_inline void do_8tap_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                          const uint8_t *_src, ptrdiff_t src_stride,
+                                          int w, int h, ptrdiff_t ds,
+                                          const int16_t *filter, int avg)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_8TAP(src, x, filter, ds);
+            }
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+#define filter_8tap_1d_fn(opn, opa, dir, ds) \
+static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                const uint8_t *src, ptrdiff_t src_stride, \
+                                                int w, int h, const int16_t *filter) \
+{ \
+    do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
+}
+
+filter_8tap_1d_fn(put, 0, v, src_stride / sizeof(pixel))
+filter_8tap_1d_fn(put, 0, h, 1)
+filter_8tap_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
+filter_8tap_1d_fn(avg, 1, h, 1)
+
+#undef filter_8tap_1d_fn
+
+static av_always_inline void do_8tap_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                          const uint8_t *_src, ptrdiff_t src_stride,
+                                          int w, int h, const int16_t *filterx,
+                                          const int16_t *filtery, int avg)
+{
+    int tmp_h = h + 7;
+    pixel tmp[64 * 71], *tmp_ptr = tmp;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    src -= src_stride * 3;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1);
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp + 64 * 3;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64);
+            }
+
+        tmp_ptr += 64;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define filter_8tap_2d_fn(opn, opa) \
+static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int w, int h, const int16_t *filterx, \
+                                           const int16_t *filtery) \
+{ \
+    do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
+}
+
+filter_8tap_2d_fn(put, 0)
+filter_8tap_2d_fn(avg, 1)
+
+#undef filter_8tap_2d_fn
+
+#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
+static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                              const uint8_t *src, ptrdiff_t src_stride, \
+                                              int h, int mx, int my) \
+{ \
+    avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
+                            vp9_subpel_filters[type_idx][dir_m]); \
+}
+
+#define filter_fn_2d(sz, type, type_idx, avg) \
+static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int h, int mx, int my) \
+{ \
+    avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
+                       vp9_subpel_filters[type_idx][mx], \
+                       vp9_subpel_filters[type_idx][my]); \
+}
+
+#if BIT_DEPTH != 12
+
+#define FILTER_BILIN(src, x, mxy, stride) \
+    (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
+
+static av_always_inline void do_bilin_1d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                           const uint8_t *_src, ptrdiff_t src_stride,
+                                           int w, int h, ptrdiff_t ds, int mxy, int avg)
+{
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_BILIN(src, x, mxy, ds);
+            }
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+#define bilin_1d_fn(opn, opa, dir, ds) \
+static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                 const uint8_t *src, ptrdiff_t src_stride, \
+                                                 int w, int h, int mxy) \
+{ \
+    do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
+}
+
+bilin_1d_fn(put, 0, v, src_stride / sizeof(pixel))
+bilin_1d_fn(put, 0, h, 1)
+bilin_1d_fn(avg, 1, v, src_stride / sizeof(pixel))
+bilin_1d_fn(avg, 1, h, 1)
+
+#undef bilin_1d_fn
+
+static av_always_inline void do_bilin_2d_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                           const uint8_t *_src, ptrdiff_t src_stride,
+                                           int w, int h, int mx, int my, int avg)
+{
+    pixel tmp[64 * 65], *tmp_ptr = tmp;
+    int tmp_h = h + 1;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1);
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
+            }
+
+        tmp_ptr += 64;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define bilin_2d_fn(opn, opa) \
+static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                            const uint8_t *src, ptrdiff_t src_stride, \
+                                            int w, int h, int mx, int my) \
+{ \
+    do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
+}
+
+bilin_2d_fn(put, 0)
+bilin_2d_fn(avg, 1)
+
+#undef bilin_2d_fn
+
+#define bilinf_fn_1d(sz, dir, dir_m, avg) \
+static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                      const uint8_t *src, ptrdiff_t src_stride, \
+                                      int h, int mx, int my) \
+{ \
+    avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
+}
+
+#define bilinf_fn_2d(sz, avg) \
+static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const uint8_t *src, ptrdiff_t src_stride, \
+                                   int h, int mx, int my) \
+{ \
+    avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
+}
+
+#else
+
+#define bilinf_fn_1d(a, b, c, d)
+#define bilinf_fn_2d(a, b)
+
+#endif
+
+#define filter_fn(sz, avg) \
+filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_2d(sz,        regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_1d(sz, h, mx, smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_1d(sz, v, my, smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_2d(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_1d(sz, h, mx, sharp,   FILTER_8TAP_SHARP,   avg) \
+filter_fn_1d(sz, v, my, sharp,   FILTER_8TAP_SHARP,   avg) \
+filter_fn_2d(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \
+bilinf_fn_1d(sz, h, mx,                               avg) \
+bilinf_fn_1d(sz, v, my,                               avg) \
+bilinf_fn_2d(sz,                                      avg)
+
+#define filter_fn_set(avg) \
+filter_fn(64, avg) \
+filter_fn(32, avg) \
+filter_fn(16, avg) \
+filter_fn(8,  avg) \
+filter_fn(4,  avg)
+
+filter_fn_set(put)
+filter_fn_set(avg)
+
+#undef filter_fn
+#undef filter_fn_set
+#undef filter_fn_1d
+#undef filter_fn_2d
+#undef bilinf_fn_1d
+#undef bilinf_fn_2d
+
+#if BIT_DEPTH != 8
+void ff_vp9dsp_mc_init_10(VP9DSPContext *dsp);
+#endif
+#if BIT_DEPTH != 10
+static
+#endif
+av_cold void FUNC(ff_vp9dsp_mc_init)(VP9DSPContext *dsp)
+{
+#if BIT_DEPTH == 12
+    ff_vp9dsp_mc_init_10(dsp);
+#else /* BIT_DEPTH == 12 */
+
+#define init_fpel(idx1, idx2, sz, type) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = type##sz##_c
+
+#define init_copy_avg(idx, sz) \
+    init_fpel(idx, 0, sz, copy); \
+    init_fpel(idx, 1, sz, avg)
+
+    init_copy_avg(0, 64);
+    init_copy_avg(1, 32);
+    init_copy_avg(2, 16);
+    init_copy_avg(3,  8);
+    init_copy_avg(4,  4);
+
+#undef init_copy_avg
+#undef init_fpel
+
+#endif /* BIT_DEPTH == 12 */
+
+#define init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c
+
+#if BIT_DEPTH == 12
+#define init_subpel1 init_subpel1_bd_aware
+#else
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
+    init_subpel1_bd_aware(idx1, idx2, idxh, idxv, sz, dir, type); \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
+#endif
+
+#define init_subpel2(idx, idxh, idxv, dir, type) \
+    init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
+    init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
+    init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
+    init_subpel1(3, idx, idxh, idxv,  8, dir, type); \
+    init_subpel1(4, idx, idxh, idxv,  4, dir, type)
+
+#define init_subpel3(idx, type) \
+    init_subpel2(idx, 1, 1, hv, type); \
+    init_subpel2(idx, 0, 1, v, type); \
+    init_subpel2(idx, 1, 0, h, type)
+
+    init_subpel3(0, put);
+    init_subpel3(1, avg);
+
+#undef init_subpel1
+#undef init_subpel2
+#undef init_subpel3
+#undef init_subpel1_bd_aware
+}
+
+static av_always_inline void do_scaled_8tap_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                              const uint8_t *_src, ptrdiff_t src_stride,
+                                              int w, int h, int mx, int my,
+                                              int dx, int dy, int avg,
+                                              const int16_t (*filters)[8])
+{
+    int tmp_h = (((h - 1) * dy + my) >> 4) + 8;
+    pixel tmp[64 * 135], *tmp_ptr = tmp;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    src -= src_stride * 3;
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            tmp_ptr[x] = FILTER_8TAP(src, ioff, filters[imx], 1);
+            imx += dx;
+            ioff += imx >> 4;
+            imx &= 0xf;
+        }
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp + 64 * 3;
+    do {
+        int x;
+        const int16_t *filter = filters[my];
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filter, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_8TAP(tmp_ptr, x, filter, 64);
+            }
+
+        my += dy;
+        tmp_ptr += (my >> 4) * 64;
+        my &= 0xf;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define scaled_filter_8tap_fn(opn, opa) \
+static av_noinline void opn##_scaled_8tap_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                            const uint8_t *src, ptrdiff_t src_stride, \
+                                            int w, int h, int mx, int my, int dx, int dy, \
+                                            const int16_t (*filters)[8]) \
+{ \
+    do_scaled_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
+                     opa, filters); \
+}
+
+scaled_filter_8tap_fn(put, 0)
+scaled_filter_8tap_fn(avg, 1)
+
+#undef scaled_filter_8tap_fn
+
+#undef FILTER_8TAP
+
+#define scaled_filter_fn(sz, type, type_idx, avg) \
+static void avg##_scaled_##type##_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int h, int mx, int my, int dx, int dy) \
+{ \
+    avg##_scaled_8tap_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy, \
+                        vp9_subpel_filters[type_idx]); \
+}
+
+#if BIT_DEPTH != 12
+
+static av_always_inline void do_scaled_bilin_c(uint8_t *_dst, ptrdiff_t dst_stride,
+                                               const uint8_t *_src, ptrdiff_t src_stride,
+                                               int w, int h, int mx, int my,
+                                               int dx, int dy, int avg)
+{
+    pixel tmp[64 * 129], *tmp_ptr = tmp;
+    int tmp_h = (((h - 1) * dy + my) >> 4) + 2;
+    pixel *dst = (pixel *) _dst;
+    const pixel *src = (const pixel *) _src;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            tmp_ptr[x] = FILTER_BILIN(src, ioff, imx, 1);
+            imx += dx;
+            ioff += imx >> 4;
+            imx &= 0xf;
+        }
+
+        tmp_ptr += 64;
+        src += src_stride;
+    } while (--tmp_h);
+
+    tmp_ptr = tmp;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            if (avg) {
+                dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
+            } else {
+                dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
+            }
+
+        my += dy;
+        tmp_ptr += (my >> 4) * 64;
+        my &= 0xf;
+        dst += dst_stride;
+    } while (--h);
+}
+
+#define scaled_bilin_fn(opn, opa) \
+static av_noinline void opn##_scaled_bilin_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                             const uint8_t *src, ptrdiff_t src_stride, \
+                                             int w, int h, int mx, int my, int dx, int dy) \
+{ \
+    do_scaled_bilin_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, opa); \
+}
+
+scaled_bilin_fn(put, 0)
+scaled_bilin_fn(avg, 1)
+
+#undef scaled_bilin_fn
+
+#undef FILTER_BILIN
+
+#define scaled_bilinf_fn(sz, avg) \
+static void avg##_scaled_bilin_##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                        const uint8_t *src, ptrdiff_t src_stride, \
+                                        int h, int mx, int my, int dx, int dy) \
+{ \
+    avg##_scaled_bilin_c(dst, dst_stride, src, src_stride, sz, h, mx, my, dx, dy); \
+}
+
+#else
+
+#define scaled_bilinf_fn(a, b)
+
+#endif
+
+#define scaled_filter_fns(sz, avg) \
+scaled_filter_fn(sz,        regular, FILTER_8TAP_REGULAR, avg) \
+scaled_filter_fn(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \
+scaled_filter_fn(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \
+scaled_bilinf_fn(sz,                                      avg)
+
+#define scaled_filter_fn_set(avg) \
+scaled_filter_fns(64, avg) \
+scaled_filter_fns(32, avg) \
+scaled_filter_fns(16, avg) \
+scaled_filter_fns(8,  avg) \
+scaled_filter_fns(4,  avg)
+
+scaled_filter_fn_set(put)
+scaled_filter_fn_set(avg)
+
+#undef scaled_filter_fns
+#undef scaled_filter_fn_set
+#undef scaled_filter_fn
+#undef scaled_bilinf_fn
+
+#if BIT_DEPTH != 8
+void ff_vp9dsp_scaled_mc_init_10(VP9DSPContext *dsp);
+#endif
+#if BIT_DEPTH != 10
+static
+#endif
+av_cold void FUNC(ff_vp9dsp_scaled_mc_init)(VP9DSPContext *dsp)
+{
+#define init_scaled_bd_aware(idx1, idx2, sz, type) \
+    dsp->smc[idx1][FILTER_8TAP_SMOOTH ][idx2] = type##_scaled_smooth_##sz##_c; \
+    dsp->smc[idx1][FILTER_8TAP_REGULAR][idx2] = type##_scaled_regular_##sz##_c; \
+    dsp->smc[idx1][FILTER_8TAP_SHARP  ][idx2] = type##_scaled_sharp_##sz##_c
+
+#if BIT_DEPTH == 12
+    ff_vp9dsp_scaled_mc_init_10(dsp);
+#define init_scaled(a,b,c,d) init_scaled_bd_aware(a,b,c,d)
+#else
+#define init_scaled(idx1, idx2, sz, type) \
+    init_scaled_bd_aware(idx1, idx2, sz, type); \
+    dsp->smc[idx1][FILTER_BILINEAR    ][idx2] = type##_scaled_bilin_##sz##_c
+#endif
+
+#define init_scaled_put_avg(idx, sz) \
+    init_scaled(idx, 0, sz, put); \
+    init_scaled(idx, 1, sz, avg)
+
+    init_scaled_put_avg(0, 64);
+    init_scaled_put_avg(1, 32);
+    init_scaled_put_avg(2, 16);
+    init_scaled_put_avg(3,  8);
+    init_scaled_put_avg(4,  4);
+
+#undef init_scaled_put_avg
+#undef init_scaled
+#undef init_scaled_bd_aware
+}
+
+av_cold void FUNC(ff_vp9dsp_init)(VP9DSPContext *dsp)
+{
+    FUNC(ff_vp9dsp_intrapred_init)(dsp);
+    vp9dsp_itxfm_init(dsp);
+    vp9dsp_loopfilter_init(dsp);
+    FUNC(ff_vp9dsp_mc_init)(dsp);
+    FUNC(ff_vp9dsp_scaled_mc_init)(dsp);
+}
diff --git a/media/ffvpx/libavcodec/x86/constants.c b/media/ffvpx/libavcodec/x86/constants.c
new file mode 100644
index 000000000..11002ee61
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/constants.c
@@ -0,0 +1,93 @@
+/*
+ * MMX/SSE/AVX constants used across x86 dsp optimizations.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h" // for xmm_reg
+#include "constants.h"
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1)    = { 0x0001000100010001ULL, 0x0001000100010001ULL,
+                                                    0x0001000100010001ULL, 0x0001000100010001ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2)    = { 0x0002000200020002ULL, 0x0002000200020002ULL,
+                                                    0x0002000200020002ULL, 0x0002000200020002ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_3)    = { 0x0003000300030003ULL, 0x0003000300030003ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4)    = { 0x0004000400040004ULL, 0x0004000400040004ULL,
+                                                    0x0004000400040004ULL, 0x0004000400040004ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_5)    = { 0x0005000500050005ULL, 0x0005000500050005ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8)    = { 0x0008000800080008ULL, 0x0008000800080008ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_9)    = { 0x0009000900090009ULL, 0x0009000900090009ULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_15)   =   0x000F000F000F000FULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_16)   = { 0x0010001000100010ULL, 0x0010001000100010ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_17)   = { 0x0011001100110011ULL, 0x0011001100110011ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_18)   = { 0x0012001200120012ULL, 0x0012001200120012ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_20)   = { 0x0014001400140014ULL, 0x0014001400140014ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_32)   = { 0x0020002000200020ULL, 0x0020002000200020ULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_42)   =   0x002A002A002A002AULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_53)   =   0x0035003500350035ULL;
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64)   = { 0x0040004000400040ULL, 0x0040004000400040ULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_96)   =   0x0060006000600060ULL;
+DECLARE_ALIGNED(8,  const uint64_t, ff_pw_128)  =   0x0080008000800080ULL;
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_255)  = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
+                                                    0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_256)  = { 0x0100010001000100ULL, 0x0100010001000100ULL,
+                                                    0x0100010001000100ULL, 0x0100010001000100ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 0x0200020002000200ULL,
+                                                    0x0200020002000200ULL, 0x0200020002000200ULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1023) = { 0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL,
+                                                    0x03ff03ff03ff03ffULL, 0x03ff03ff03ff03ffULL};
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_1024) = { 0x0400040004000400ULL, 0x0400040004000400ULL,
+                                                    0x0400040004000400ULL, 0x0400040004000400ULL};
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_2048) = { 0x0800080008000800ULL, 0x0800080008000800ULL,
+                                                    0x0800080008000800ULL, 0x0800080008000800ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4095) = { 0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL,
+                                                    0x0fff0fff0fff0fffULL, 0x0fff0fff0fff0fffULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_4096) = { 0x1000100010001000ULL, 0x1000100010001000ULL,
+                                                    0x1000100010001000ULL, 0x1000100010001000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_8192) = { 0x2000200020002000ULL, 0x2000200020002000ULL,
+                                                    0x2000200020002000ULL, 0x2000200020002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pw_m1)   = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
+                                                    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_0)    = { 0x0000000000000000ULL, 0x0000000000000000ULL,
+                                                    0x0000000000000000ULL, 0x0000000000000000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_1)    = { 0x0101010101010101ULL, 0x0101010101010101ULL,
+                                                    0x0101010101010101ULL, 0x0101010101010101ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_2)    = { 0x0202020202020202ULL, 0x0202020202020202ULL,
+                                                    0x0202020202020202ULL, 0x0202020202020202ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL,
+                                                    0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(32, const xmm_reg,  ff_pb_15)   = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL,
+                                                    0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
+DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
+
+DECLARE_ALIGNED(16, const xmm_reg,  ff_ps_neg)  = { 0x8000000080000000ULL, 0x8000000080000000ULL };
+
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_1)    = { 0x0000000100000001ULL, 0x0000000100000001ULL,
+                                                    0x0000000100000001ULL, 0x0000000100000001ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_16)   = { 0x0000001000000010ULL, 0x0000001000000010ULL,
+                                                    0x0000001000000010ULL, 0x0000001000000010ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_32)   = { 0x0000002000000020ULL, 0x0000002000000020ULL,
+                                                    0x0000002000000020ULL, 0x0000002000000020ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_8192) = { 0x0000200000002000ULL, 0x0000200000002000ULL,
+                                                    0x0000200000002000ULL, 0x0000200000002000ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pd_65535)= { 0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL,
+                                                    0x0000ffff0000ffffULL, 0x0000ffff0000ffffULL };
diff --git a/media/ffvpx/libavcodec/x86/constants.h b/media/ffvpx/libavcodec/x86/constants.h
new file mode 100644
index 000000000..b82aef9a4
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/constants.h
@@ -0,0 +1,71 @@
+/*
+ * MMX/SSE constants used across x86 dsp optimizations.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_CONSTANTS_H
+#define AVCODEC_X86_CONSTANTS_H
+
+#include <stdint.h>
+
+#include "libavutil/x86/asm.h"
+
+extern const ymm_reg  ff_pw_1;
+extern const ymm_reg  ff_pw_2;
+extern const xmm_reg  ff_pw_3;
+extern const ymm_reg  ff_pw_4;
+extern const xmm_reg  ff_pw_5;
+extern const xmm_reg  ff_pw_8;
+extern const xmm_reg  ff_pw_9;
+extern const uint64_t ff_pw_15;
+extern const xmm_reg  ff_pw_16;
+extern const xmm_reg  ff_pw_18;
+extern const xmm_reg  ff_pw_20;
+extern const xmm_reg  ff_pw_32;
+extern const uint64_t ff_pw_42;
+extern const uint64_t ff_pw_53;
+extern const xmm_reg  ff_pw_64;
+extern const uint64_t ff_pw_96;
+extern const uint64_t ff_pw_128;
+extern const ymm_reg  ff_pw_255;
+extern const ymm_reg  ff_pw_512;
+extern const ymm_reg  ff_pw_1023;
+extern const ymm_reg  ff_pw_1024;
+extern const ymm_reg  ff_pw_2048;
+extern const ymm_reg  ff_pw_4095;
+extern const ymm_reg  ff_pw_4096;
+extern const ymm_reg  ff_pw_8192;
+extern const ymm_reg  ff_pw_m1;
+
+extern const ymm_reg  ff_pb_0;
+extern const ymm_reg  ff_pb_1;
+extern const ymm_reg  ff_pb_2;
+extern const ymm_reg  ff_pb_3;
+extern const xmm_reg  ff_pb_80;
+extern const ymm_reg  ff_pb_FE;
+extern const uint64_t ff_pb_FC;
+
+extern const xmm_reg  ff_ps_neg;
+
+extern const ymm_reg  ff_pd_1;
+extern const ymm_reg  ff_pd_16;
+extern const ymm_reg  ff_pd_32;
+extern const ymm_reg  ff_pd_8192;
+extern const ymm_reg  ff_pd_65535;
+
+#endif /* AVCODEC_X86_CONSTANTS_H */
diff --git a/media/ffvpx/libavcodec/x86/flacdsp.asm b/media/ffvpx/libavcodec/x86/flacdsp.asm
new file mode 100644
index 000000000..713861152
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/flacdsp.asm
@@ -0,0 +1,313 @@
+;******************************************************************************
+;* FLAC DSP SIMD optimizations
+;*
+;* Copyright (C) 2014 Loren Merritt
+;* Copyright (C) 2014 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+%macro PMACSDQL 5
+%if cpuflag(xop)
+    pmacsdql %1, %2, %3, %1
+%else
+    pmuldq   %2, %3
+    paddq    %1, %2
+%endif
+%endmacro
+
+%macro LPC_32 1
+INIT_XMM %1
+cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
+    sub    lend, pred_orderd
+    jle .ret
+    lea    decodedq, [decodedq+pred_orderq*4-8]
+    lea    coeffsq, [coeffsq+pred_orderq*4]
+    neg    pred_orderq
+    movd   m4, qlevelm
+ALIGN 16
+.loop_sample:
+    movd   m0, [decodedq+pred_orderq*4+8]
+    add    decodedq, 8
+    movd   m1, [coeffsq+pred_orderq*4]
+    pxor   m2, m2
+    pxor   m3, m3
+    lea    jq, [pred_orderq+1]
+    test   jq, jq
+    jz .end_order
+.loop_order:
+    PMACSDQL m2, m0, m1, m2, m0
+    movd   m0, [decodedq+jq*4]
+    PMACSDQL m3, m1, m0, m3, m1
+    movd   m1, [coeffsq+jq*4]
+    inc    jq
+    jl .loop_order
+.end_order:
+    PMACSDQL m2, m0, m1, m2, m0
+    psrlq  m2, m4
+    movd   m0, [decodedq]
+    paddd  m0, m2
+    movd   [decodedq], m0
+    sub  lend, 2
+    jl .ret
+    PMACSDQL m3, m1, m0, m3, m1
+    psrlq  m3, m4
+    movd   m1, [decodedq+4]
+    paddd  m1, m3
+    movd   [decodedq+4], m1
+    jg .loop_sample
+.ret:
+    REP_RET
+%endmacro
+
+%if HAVE_XOP_EXTERNAL
+LPC_32 xop
+%endif
+LPC_32 sse4
+
+;----------------------------------------------------------------------------------
+;void ff_flac_decorrelate_[lrm]s_16_sse2(uint8_t **out, int32_t **in, int channels,
+;                                                   int len, int shift);
+;----------------------------------------------------------------------------------
+%macro FLAC_DECORRELATE_16 3-4
+cglobal flac_decorrelate_%1_16, 2, 4, 4, out, in0, in1, len
+%if ARCH_X86_32
+    mov      lend, lenm
+%endif
+    movd       m3, r4m
+    shl      lend, 2
+    mov      in1q, [in0q + gprsize]
+    mov      in0q, [in0q]
+    mov      outq, [outq]
+    add      in1q, lenq
+    add      in0q, lenq
+    add      outq, lenq
+    neg      lenq
+
+align 16
+.loop:
+    mova       m0, [in0q + lenq]
+    mova       m1, [in1q + lenq]
+%ifidn %1, ms
+    psrad      m2, m1, 1
+    psubd      m0, m2
+%endif
+%ifnidn %1, indep2
+    p%4d       m2, m0, m1
+%endif
+    packssdw  m%2, m%2
+    packssdw  m%3, m%3
+    punpcklwd m%2, m%3
+    psllw     m%2, m3
+    mova [outq + lenq], m%2
+    add      lenq, 16
+    jl .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse2
+FLAC_DECORRELATE_16 ls, 0, 2, sub
+FLAC_DECORRELATE_16 rs, 2, 1, add
+FLAC_DECORRELATE_16 ms, 2, 0, add
+
+;----------------------------------------------------------------------------------
+;void ff_flac_decorrelate_[lrm]s_32_sse2(uint8_t **out, int32_t **in, int channels,
+;                                        int len, int shift);
+;----------------------------------------------------------------------------------
+%macro FLAC_DECORRELATE_32 5
+cglobal flac_decorrelate_%1_32, 2, 4, 4, out, in0, in1, len
+%if ARCH_X86_32
+    mov      lend, lenm
+%endif
+    movd       m3, r4m
+    mov      in1q, [in0q + gprsize]
+    mov      in0q, [in0q]
+    mov      outq, [outq]
+    sub      in1q, in0q
+
+align 16
+.loop:
+    mova       m0, [in0q]
+    mova       m1, [in0q + in1q]
+%ifidn %1, ms
+    psrad      m2, m1, 1
+    psubd      m0, m2
+%endif
+    p%5d       m2, m0, m1
+    pslld     m%2, m3
+    pslld     m%3, m3
+
+    SBUTTERFLY dq, %2, %3, %4
+
+    mova  [outq         ], m%2
+    mova  [outq + mmsize], m%3
+
+    add      in0q, mmsize
+    add      outq, mmsize*2
+    sub      lend, mmsize/4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse2
+FLAC_DECORRELATE_32 ls, 0, 2, 1, sub
+FLAC_DECORRELATE_32 rs, 2, 1, 0, add
+FLAC_DECORRELATE_32 ms, 2, 0, 1, add
+
+;-----------------------------------------------------------------------------------------
+;void ff_flac_decorrelate_indep<ch>_<bps>_<opt>(uint8_t **out, int32_t **in, int channels,
+;                                            int len, int shift);
+;-----------------------------------------------------------------------------------------
+;%1 = bps
+;%2 = channels
+;%3 = last xmm reg used
+;%4 = word/dword (shift instruction)
+%macro FLAC_DECORRELATE_INDEP 4
+%define REPCOUNT %2/(32/%1) ; 16bits = channels / 2; 32bits = channels
+cglobal flac_decorrelate_indep%2_%1, 2, %2+2, %3+1, out, in0, in1, len, in2, in3, in4, in5, in6, in7
+%if ARCH_X86_32
+%if %2 == 6
+    DEFINE_ARGS out, in0, in1, in2, in3, in4, in5
+    %define  lend  dword r3m
+%else
+    mov      lend, lenm
+%endif
+%endif
+    movd      m%3, r4m
+
+%assign %%i 1
+%rep %2-1
+    mov      in %+ %%i %+ q, [in0q+%%i*gprsize]
+%assign %%i %%i+1
+%endrep
+
+    mov      in0q, [in0q]
+    mov      outq, [outq]
+
+%assign %%i 1
+%rep %2-1
+    sub      in %+ %%i %+ q, in0q
+%assign %%i %%i+1
+%endrep
+
+align 16
+.loop:
+    mova       m0, [in0q]
+
+%assign %%i 1
+%rep REPCOUNT-1
+    mova     m %+ %%i, [in0q + in %+ %%i %+ q]
+%assign %%i %%i+1
+%endrep
+
+%if %1 == 32
+
+%if %2 == 8
+    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
+%elif %2 == 6
+    SBUTTERFLY dq, 0, 1, 6
+    SBUTTERFLY dq, 2, 3, 6
+    SBUTTERFLY dq, 4, 5, 6
+
+    punpcklqdq m6, m0, m2
+    punpckhqdq m2, m4
+    shufps     m4, m0, 0xe4
+    punpcklqdq m0, m1, m3
+    punpckhqdq m3, m5
+    shufps     m5, m1, 0xe4
+    SWAP 0,6,1,4,5,3
+%elif %2 == 4
+    TRANSPOSE4x4D 0, 1, 2, 3, 4
+%else ; %2 == 2
+    SBUTTERFLY dq, 0, 1, 2
+%endif
+
+%else ; %1 == 16
+
+%if %2 == 8
+    packssdw   m0, [in0q + in4q]
+    packssdw   m1, [in0q + in5q]
+    packssdw   m2, [in0q + in6q]
+    packssdw   m3, [in0q + in7q]
+    TRANSPOSE2x4x4W 0, 1, 2, 3, 4
+%elif %2 == 6
+    packssdw   m0, [in0q + in3q]
+    packssdw   m1, [in0q + in4q]
+    packssdw   m2, [in0q + in5q]
+    pshufd     m3, m0,     q1032
+    punpcklwd  m0, m1
+    punpckhwd  m1, m2
+    punpcklwd  m2, m3
+
+    shufps     m3, m0, m2, q2020
+    shufps     m0, m1,     q2031
+    shufps     m2, m1,     q3131
+    shufps     m1, m2, m3, q3120
+    shufps     m3, m0,     q0220
+    shufps     m0, m2,     q3113
+    SWAP 2, 0, 3
+%else ; %2 == 4
+    packssdw   m0, [in0q + in2q]
+    packssdw   m1, [in0q + in3q]
+    SBUTTERFLY wd, 0, 1, 2
+    SBUTTERFLY dq, 0, 1, 2
+%endif
+
+%endif
+
+%assign %%i 0
+%rep REPCOUNT
+    psll%4   m %+ %%i, m%3
+%assign %%i %%i+1
+%endrep
+
+%assign %%i 0
+%rep REPCOUNT
+    mova [outq + %%i*mmsize], m %+ %%i
+%assign %%i %%i+1
+%endrep
+
+    add      in0q, mmsize
+    add      outq, mmsize*REPCOUNT
+    sub      lend, mmsize/4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse2
+FLAC_DECORRELATE_16 indep2, 0, 1 ; Reuse stereo 16bits macro
+FLAC_DECORRELATE_INDEP 32, 2, 3, d
+FLAC_DECORRELATE_INDEP 16, 4, 3, w
+FLAC_DECORRELATE_INDEP 32, 4, 5, d
+FLAC_DECORRELATE_INDEP 16, 6, 4, w
+FLAC_DECORRELATE_INDEP 32, 6, 7, d
+%if ARCH_X86_64
+FLAC_DECORRELATE_INDEP 16, 8, 5, w
+FLAC_DECORRELATE_INDEP 32, 8, 9, d
+%endif
+
+INIT_XMM avx
+FLAC_DECORRELATE_INDEP 32, 4, 5, d
+FLAC_DECORRELATE_INDEP 32, 6, 7, d
+%if ARCH_X86_64
+FLAC_DECORRELATE_INDEP 16, 8, 5, w
+FLAC_DECORRELATE_INDEP 32, 8, 9, d
+%endif
diff --git a/media/ffvpx/libavcodec/x86/flacdsp_init.c b/media/ffvpx/libavcodec/x86/flacdsp_init.c
new file mode 100644
index 000000000..e28c5c932
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/flacdsp_init.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2014 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/flacdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
+                         int qlevel, int len);
+void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
+                        int qlevel, int len);
+
+void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const int32_t *,int);
+
+#define DECORRELATE_FUNCS(fmt, opt)                                                      \
+void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
+                                          int len, int shift);                           \
+void ff_flac_decorrelate_rs_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
+                                          int len, int shift);                           \
+void ff_flac_decorrelate_ms_##fmt##_##opt(uint8_t **out, int32_t **in, int channels,     \
+                                          int len, int shift);                           \
+void ff_flac_decorrelate_indep2_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                             int len, int shift);                        \
+void ff_flac_decorrelate_indep4_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                              int len, int shift);                       \
+void ff_flac_decorrelate_indep6_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                              int len, int shift);                       \
+void ff_flac_decorrelate_indep8_##fmt##_##opt(uint8_t **out, int32_t **in, int channels, \
+                                              int len, int shift)
+
+DECORRELATE_FUNCS(16, sse2);
+DECORRELATE_FUNCS(16,  avx);
+DECORRELATE_FUNCS(32, sse2);
+DECORRELATE_FUNCS(32,  avx);
+
+av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels,
+                                 int bps)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+#if CONFIG_FLAC_DECODER
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        if (fmt == AV_SAMPLE_FMT_S16) {
+            if (channels == 2)
+                c->decorrelate[0] = ff_flac_decorrelate_indep2_16_sse2;
+            else if (channels == 4)
+                c->decorrelate[0] = ff_flac_decorrelate_indep4_16_sse2;
+            else if (channels == 6)
+                c->decorrelate[0] = ff_flac_decorrelate_indep6_16_sse2;
+            else if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_16_sse2;
+            c->decorrelate[1] = ff_flac_decorrelate_ls_16_sse2;
+            c->decorrelate[2] = ff_flac_decorrelate_rs_16_sse2;
+            c->decorrelate[3] = ff_flac_decorrelate_ms_16_sse2;
+        } else if (fmt == AV_SAMPLE_FMT_S32) {
+            if (channels == 2)
+                c->decorrelate[0] = ff_flac_decorrelate_indep2_32_sse2;
+            else if (channels == 4)
+                c->decorrelate[0] = ff_flac_decorrelate_indep4_32_sse2;
+            else if (channels == 6)
+                c->decorrelate[0] = ff_flac_decorrelate_indep6_32_sse2;
+            else if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_32_sse2;
+            c->decorrelate[1] = ff_flac_decorrelate_ls_32_sse2;
+            c->decorrelate[2] = ff_flac_decorrelate_rs_32_sse2;
+            c->decorrelate[3] = ff_flac_decorrelate_ms_32_sse2;
+        }
+    }
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->lpc32 = ff_flac_lpc_32_sse4;
+    }
+    if (EXTERNAL_AVX(cpu_flags)) {
+        if (fmt == AV_SAMPLE_FMT_S16) {
+            if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_16_avx;
+        } else if (fmt == AV_SAMPLE_FMT_S32) {
+            if (channels == 4)
+                c->decorrelate[0] = ff_flac_decorrelate_indep4_32_avx;
+            else if (channels == 6)
+                c->decorrelate[0] = ff_flac_decorrelate_indep6_32_avx;
+            else if (ARCH_X86_64 && channels == 8)
+                c->decorrelate[0] = ff_flac_decorrelate_indep8_32_avx;
+        }
+    }
+    if (EXTERNAL_XOP(cpu_flags)) {
+        c->lpc32 = ff_flac_lpc_32_xop;
+    }
+#endif
+
+#if CONFIG_FLAC_ENCODER
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        if (CONFIG_GPL)
+            c->lpc16_encode = ff_flac_enc_lpc_16_sse4;
+    }
+#endif
+#endif /* HAVE_YASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/h264_i386.h b/media/ffvpx/libavcodec/x86/h264_i386.h
new file mode 100644
index 000000000..4dfbc3093
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_i386.h
@@ -0,0 +1,212 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 / AVC / MPEG-4 part10 codec.
+ * non-MMX i386-specific optimizations for H.264
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_X86_H264_I386_H
+#define AVCODEC_X86_H264_I386_H
+
+#include <stddef.h>
+
+#include "libavcodec/cabac.h"
+#include "cabac.h"
+
+#if HAVE_INLINE_ASM
+
+#if ARCH_X86_64
+#define REG64 "r"
+#else
+#define REG64 "m"
+#endif
+
+//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
+//as that would make optimization work hard)
+#if HAVE_7REGS && !BROKEN_COMPILER
+#define decode_significance decode_significance_x86
+static int decode_significance_x86(CABACContext *c, int max_coeff,
+                                   uint8_t *significant_coeff_ctx_base,
+                                   int *index, x86_reg last_off){
+    void *end= significant_coeff_ctx_base + max_coeff - 1;
+    int minusstart= -(intptr_t)significant_coeff_ctx_base;
+    int minusindex= 4-(intptr_t)index;
+    int bit;
+    x86_reg coeff_count;
+
+#ifdef BROKEN_RELOCATIONS
+    void *tables;
+
+    __asm__ volatile(
+        "lea   "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
+        : "=&r"(tables)
+        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
+    );
+#endif
+
+    __asm__ volatile(
+        "3:                                     \n\t"
+
+        BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
+                             "%5", "%q5", "%k0", "%b0",
+                             "%c11(%6)", "%c12(%6)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%13")
+
+        "test $1, %4                            \n\t"
+        " jz 4f                                 \n\t"
+        "add  %10, %1                           \n\t"
+
+        BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
+                             "%5", "%q5", "%k0", "%b0",
+                             "%c11(%6)", "%c12(%6)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%13")
+
+        "sub  %10, %1                           \n\t"
+        "mov  %2, %0                            \n\t"
+        "movl %7, %%ecx                         \n\t"
+        "add  %1, %%"REG_c"                     \n\t"
+        "movl %%ecx, (%0)                       \n\t"
+
+        "test $1, %4                            \n\t"
+        " jnz 5f                                \n\t"
+
+        "add"OPSIZE"  $4, %2                    \n\t"
+
+        "4:                                     \n\t"
+        "add  $1, %1                            \n\t"
+        "cmp  %8, %1                            \n\t"
+        " jb 3b                                 \n\t"
+        "mov  %2, %0                            \n\t"
+        "movl %7, %%ecx                         \n\t"
+        "add  %1, %%"REG_c"                     \n\t"
+        "movl %%ecx, (%0)                       \n\t"
+        "5:                                     \n\t"
+        "add  %9, %k0                           \n\t"
+        "shr $2, %k0                            \n\t"
+        : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
+          "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
+        : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
+          "i"(offsetof(CABACContext, bytestream)),
+          "i"(offsetof(CABACContext, bytestream_end))
+          TABLES_ARG
+        : "%"REG_c, "memory"
+    );
+    return coeff_count;
+}
+
+#define decode_significance_8x8 decode_significance_8x8_x86
+static int decode_significance_8x8_x86(CABACContext *c,
+                                       uint8_t *significant_coeff_ctx_base,
+                                       int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
+    int minusindex= 4-(intptr_t)index;
+    int bit;
+    x86_reg coeff_count;
+    x86_reg last=0;
+    x86_reg state;
+
+#ifdef BROKEN_RELOCATIONS
+    void *tables;
+
+    __asm__ volatile(
+        "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
+        : "=&r"(tables)
+        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
+    );
+#endif
+
+    __asm__ volatile(
+        "mov %1, %6                             \n\t"
+        "3:                                     \n\t"
+
+        "mov %10, %0                            \n\t"
+        "movzb (%0, %6), %6                     \n\t"
+        "add %9, %6                             \n\t"
+
+        BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
+                             "%5", "%q5", "%k0", "%b0",
+                             "%c12(%7)", "%c13(%7)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%15")
+
+        "mov %1, %6                             \n\t"
+        "test $1, %4                            \n\t"
+        " jz 4f                                 \n\t"
+
+#ifdef BROKEN_RELOCATIONS
+        "movzb %c14(%15, %q6), %6\n\t"
+#else
+        "movzb "MANGLE(ff_h264_cabac_tables)"+%c14(%6), %6\n\t"
+#endif
+        "add %11, %6                            \n\t"
+
+        BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
+                             "%5", "%q5", "%k0", "%b0",
+                             "%c12(%7)", "%c13(%7)",
+                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
+                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
+                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
+                             "%15")
+
+        "mov %2, %0                             \n\t"
+        "mov %1, %6                             \n\t"
+        "mov %k6, (%0)                          \n\t"
+
+        "test $1, %4                            \n\t"
+        " jnz 5f                                \n\t"
+
+        "add"OPSIZE"  $4, %2                    \n\t"
+
+        "4:                                     \n\t"
+        "add $1, %6                             \n\t"
+        "mov %6, %1                             \n\t"
+        "cmp $63, %6                            \n\t"
+        " jb 3b                                 \n\t"
+        "mov %2, %0                             \n\t"
+        "mov %k6, (%0)                          \n\t"
+        "5:                                     \n\t"
+        "addl %8, %k0                           \n\t"
+        "shr $2, %k0                            \n\t"
+        : "=&q"(coeff_count), "+"REG64(last), "+"REG64(index), "+&r"(c->low),
+          "=&r"(bit), "+&r"(c->range), "=&r"(state)
+        : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
+          REG64(sig_off), REG64(last_coeff_ctx_base),
+          "i"(offsetof(CABACContext, bytestream)),
+          "i"(offsetof(CABACContext, bytestream_end)),
+          "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
+        : "%"REG_c, "memory"
+    );
+    return coeff_count;
+}
+#endif /* HAVE_7REGS && BROKEN_COMPILER */
+
+#endif /* HAVE_INLINE_ASM */
+#endif /* AVCODEC_X86_H264_I386_H */
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred.asm b/media/ffvpx/libavcodec/x86/h264_intrapred.asm
new file mode 100644
index 000000000..c88d91b49
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred.asm
@@ -0,0 +1,2717 @@
+;******************************************************************************
+;* H.264 intra prediction asm optimizations
+;* Copyright (c) 2010 Fiona Glaser
+;* Copyright (c) 2010 Holger Lubitz
+;* Copyright (c) 2010 Loren Merritt
+;* Copyright (c) 2010 Ronald S. Bultje
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+tm_shuf: times 8 db 0x03, 0x80
+pw_ff00: times 8 dw 0xff00
+plane_shuf:  db -8, -7, -6, -5, -4, -3, -2, -1
+             db  1,  2,  3,  4,  5,  6,  7,  8
+plane8_shuf: db -4, -3, -2, -1,  0,  0,  0,  0
+             db  1,  2,  3,  4,  0,  0,  0,  0
+pw_0to7:     dw  0,  1,  2,  3,  4,  5,  6,  7
+pw_1to8:     dw  1,  2,  3,  4,  5,  6,  7,  8
+pw_m8tom1:   dw -8, -7, -6, -5, -4, -3, -2, -1
+pw_m4to4:    dw -4, -3, -2, -1,  1,  2,  3,  4
+
+SECTION .text
+
+cextern pb_1
+cextern pb_3
+cextern pw_4
+cextern pw_5
+cextern pw_8
+cextern pw_16
+cextern pw_17
+cextern pw_32
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_vertical_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal pred16x16_vertical_8, 2,3
+    sub   r0, r1
+    mov   r2, 8
+    movq mm0, [r0+0]
+    movq mm1, [r0+8]
+.loop:
+    movq [r0+r1*1+0], mm0
+    movq [r0+r1*1+8], mm1
+    movq [r0+r1*2+0], mm0
+    movq [r0+r1*2+8], mm1
+    lea   r0, [r0+r1*2]
+    dec   r2
+    jg .loop
+    REP_RET
+
+INIT_XMM sse
+cglobal pred16x16_vertical_8, 2,3
+    sub   r0, r1
+    mov   r2, 4
+    movaps xmm0, [r0]
+.loop:
+    movaps [r0+r1*1], xmm0
+    movaps [r0+r1*2], xmm0
+    lea   r0, [r0+r1*2]
+    movaps [r0+r1*1], xmm0
+    movaps [r0+r1*2], xmm0
+    lea   r0, [r0+r1*2]
+    dec   r2
+    jg .loop
+    REP_RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_horizontal_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED16x16_H 0
+cglobal pred16x16_horizontal_8, 2,3
+    mov       r2, 8
+%if cpuflag(ssse3)
+    mova      m2, [pb_3]
+%endif
+.loop:
+    movd      m0, [r0+r1*0-4]
+    movd      m1, [r0+r1*1-4]
+
+%if cpuflag(ssse3)
+    pshufb    m0, m2
+    pshufb    m1, m2
+%else
+    punpcklbw m0, m0
+    punpcklbw m1, m1
+    SPLATW    m0, m0, 3
+    SPLATW    m1, m1, 3
+    mova [r0+r1*0+8], m0
+    mova [r0+r1*1+8], m1
+%endif
+
+    mova [r0+r1*0], m0
+    mova [r0+r1*1], m1
+    lea       r0, [r0+r1*2]
+    dec       r2
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+PRED16x16_H
+INIT_MMX mmxext
+PRED16x16_H
+INIT_XMM ssse3
+PRED16x16_H
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_dc_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED16x16_DC 0
+cglobal pred16x16_dc_8, 2,7
+    mov       r4, r0
+    sub       r0, r1
+    pxor      mm0, mm0
+    pxor      mm1, mm1
+    psadbw    mm0, [r0+0]
+    psadbw    mm1, [r0+8]
+    dec        r0
+    movzx     r5d, byte [r0+r1*1]
+    paddw     mm0, mm1
+    movd      r6d, mm0
+    lea        r0, [r0+r1*2]
+%rep 7
+    movzx     r2d, byte [r0+r1*0]
+    movzx     r3d, byte [r0+r1*1]
+    add       r5d, r2d
+    add       r6d, r3d
+    lea        r0, [r0+r1*2]
+%endrep
+    movzx     r2d, byte [r0+r1*0]
+    add       r5d, r6d
+    lea       r2d, [r2+r5+16]
+    shr       r2d, 5
+%if cpuflag(ssse3)
+    pxor       m1, m1
+%endif
+    SPLATB_REG m0, r2, m1
+
+%if mmsize==8
+    mov       r3d, 8
+.loop:
+    mova [r4+r1*0+0], m0
+    mova [r4+r1*0+8], m0
+    mova [r4+r1*1+0], m0
+    mova [r4+r1*1+8], m0
+%else
+    mov       r3d, 4
+.loop:
+    mova [r4+r1*0], m0
+    mova [r4+r1*1], m0
+    lea   r4, [r4+r1*2]
+    mova [r4+r1*0], m0
+    mova [r4+r1*1], m0
+%endif
+    lea   r4, [r4+r1*2]
+    dec   r3d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_DC
+INIT_XMM sse2
+PRED16x16_DC
+INIT_XMM ssse3
+PRED16x16_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_tm_vp8_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED16x16_TM 0
+cglobal pred16x16_tm_vp8_8, 2,5
+    sub        r0, r1
+    pxor      mm7, mm7
+    movq      mm0, [r0+0]
+    movq      mm2, [r0+8]
+    movq      mm1, mm0
+    movq      mm3, mm2
+    punpcklbw mm0, mm7
+    punpckhbw mm1, mm7
+    punpcklbw mm2, mm7
+    punpckhbw mm3, mm7
+    movzx     r3d, byte [r0-1]
+    mov       r4d, 16
+.loop:
+    movzx     r2d, byte [r0+r1-1]
+    sub       r2d, r3d
+    movd      mm4, r2d
+    SPLATW    mm4, mm4, 0
+    movq      mm5, mm4
+    movq      mm6, mm4
+    movq      mm7, mm4
+    paddw     mm4, mm0
+    paddw     mm5, mm1
+    paddw     mm6, mm2
+    paddw     mm7, mm3
+    packuswb  mm4, mm5
+    packuswb  mm6, mm7
+    movq [r0+r1+0], mm4
+    movq [r0+r1+8], mm6
+    add        r0, r1
+    dec       r4d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+PRED16x16_TM
+INIT_MMX mmxext
+PRED16x16_TM
+
+INIT_XMM sse2
+cglobal pred16x16_tm_vp8_8, 2,6,6
+    sub          r0, r1
+    pxor       xmm2, xmm2
+    movdqa     xmm0, [r0]
+    movdqa     xmm1, xmm0
+    punpcklbw  xmm0, xmm2
+    punpckhbw  xmm1, xmm2
+    movzx       r4d, byte [r0-1]
+    mov         r5d, 8
+.loop:
+    movzx       r2d, byte [r0+r1*1-1]
+    movzx       r3d, byte [r0+r1*2-1]
+    sub         r2d, r4d
+    sub         r3d, r4d
+    movd       xmm2, r2d
+    movd       xmm4, r3d
+    pshuflw    xmm2, xmm2, 0
+    pshuflw    xmm4, xmm4, 0
+    punpcklqdq xmm2, xmm2
+    punpcklqdq xmm4, xmm4
+    movdqa     xmm3, xmm2
+    movdqa     xmm5, xmm4
+    paddw      xmm2, xmm0
+    paddw      xmm3, xmm1
+    paddw      xmm4, xmm0
+    paddw      xmm5, xmm1
+    packuswb   xmm2, xmm3
+    packuswb   xmm4, xmm5
+    movdqa [r0+r1*1], xmm2
+    movdqa [r0+r1*2], xmm4
+    lea          r0, [r0+r1*2]
+    dec         r5d
+    jg .loop
+    REP_RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_plane_*_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro H264_PRED16x16_PLANE 1
+cglobal pred16x16_plane_%1_8, 2,9,7
+    mov          r2, r1           ; +stride
+    neg          r1               ; -stride
+
+    movh         m0, [r0+r1  -1]
+%if mmsize == 8
+    pxor         m4, m4
+    movh         m1, [r0+r1  +3 ]
+    movh         m2, [r0+r1  +8 ]
+    movh         m3, [r0+r1  +12]
+    punpcklbw    m0, m4
+    punpcklbw    m1, m4
+    punpcklbw    m2, m4
+    punpcklbw    m3, m4
+    pmullw       m0, [pw_m8tom1  ]
+    pmullw       m1, [pw_m8tom1+8]
+    pmullw       m2, [pw_1to8    ]
+    pmullw       m3, [pw_1to8  +8]
+    paddw        m0, m2
+    paddw        m1, m3
+%else ; mmsize == 16
+%if cpuflag(ssse3)
+    movhps       m0, [r0+r1  +8]
+    pmaddubsw    m0, [plane_shuf] ; H coefficients
+%else ; sse2
+    pxor         m2, m2
+    movh         m1, [r0+r1  +8]
+    punpcklbw    m0, m2
+    punpcklbw    m1, m2
+    pmullw       m0, [pw_m8tom1]
+    pmullw       m1, [pw_1to8]
+    paddw        m0, m1
+%endif
+    movhlps      m1, m0
+%endif
+    paddw        m0, m1
+%if cpuflag(mmxext)
+    PSHUFLW      m1, m0, 0xE
+%elif cpuflag(mmx)
+    mova         m1, m0
+    psrlq        m1, 32
+%endif
+    paddw        m0, m1
+%if cpuflag(mmxext)
+    PSHUFLW      m1, m0, 0x1
+%elif cpuflag(mmx)
+    mova         m1, m0
+    psrlq        m1, 16
+%endif
+    paddw        m0, m1           ; sum of H coefficients
+
+    lea          r4, [r0+r2*8-1]
+    lea          r3, [r0+r2*4-1]
+    add          r4, r2
+
+%if ARCH_X86_64
+%define e_reg r8
+%else
+%define e_reg r0
+%endif
+
+    movzx     e_reg, byte [r3+r2*2   ]
+    movzx        r5, byte [r4+r1     ]
+    sub          r5, e_reg
+
+    movzx     e_reg, byte [r3+r2     ]
+    movzx        r6, byte [r4        ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*2]
+
+    movzx     e_reg, byte [r3+r1     ]
+    movzx        r6, byte [r4+r2*2   ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*4]
+
+    movzx     e_reg, byte [r3        ]
+%if ARCH_X86_64
+    movzx        r7, byte [r4+r2     ]
+    sub          r7, e_reg
+%else
+    movzx        r6, byte [r4+r2     ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*4]
+    sub          r5, r6
+%endif
+
+    lea       e_reg, [r3+r1*4]
+    lea          r3, [r4+r2*4]
+
+    movzx        r4, byte [e_reg+r2  ]
+    movzx        r6, byte [r3        ]
+    sub          r6, r4
+%if ARCH_X86_64
+    lea          r6, [r7+r6*2]
+    lea          r5, [r5+r6*2]
+    add          r5, r6
+%else
+    lea          r5, [r5+r6*4]
+    lea          r5, [r5+r6*2]
+%endif
+
+    movzx        r4, byte [e_reg     ]
+%if ARCH_X86_64
+    movzx        r7, byte [r3   +r2  ]
+    sub          r7, r4
+    sub          r5, r7
+%else
+    movzx        r6, byte [r3   +r2  ]
+    sub          r6, r4
+    lea          r5, [r5+r6*8]
+    sub          r5, r6
+%endif
+
+    movzx        r4, byte [e_reg+r1  ]
+    movzx        r6, byte [r3   +r2*2]
+    sub          r6, r4
+%if ARCH_X86_64
+    add          r6, r7
+%endif
+    lea          r5, [r5+r6*8]
+
+    movzx        r4, byte [e_reg+r2*2]
+    movzx        r6, byte [r3   +r1  ]
+    sub          r6, r4
+    lea          r5, [r5+r6*4]
+    add          r5, r6           ; sum of V coefficients
+
+%if ARCH_X86_64 == 0
+    mov          r0, r0m
+%endif
+
+%ifidn %1, h264
+    lea          r5, [r5*5+32]
+    sar          r5, 6
+%elifidn %1, rv40
+    lea          r5, [r5*5]
+    sar          r5, 6
+%elifidn %1, svq3
+    test         r5, r5
+    lea          r6, [r5+3]
+    cmovs        r5, r6
+    sar          r5, 2            ; V/4
+    lea          r5, [r5*5]       ; 5*(V/4)
+    test         r5, r5
+    lea          r6, [r5+15]
+    cmovs        r5, r6
+    sar          r5, 4            ; (5*(V/4))/16
+%endif
+
+    movzx        r4, byte [r0+r1  +15]
+    movzx        r3, byte [r3+r2*2   ]
+    lea          r3, [r3+r4+1]
+    shl          r3, 4
+
+    movd        r1d, m0
+    movsx       r1d, r1w
+%ifnidn %1, svq3
+%ifidn %1, h264
+    lea         r1d, [r1d*5+32]
+%else ; rv40
+    lea         r1d, [r1d*5]
+%endif
+    sar         r1d, 6
+%else ; svq3
+    test        r1d, r1d
+    lea         r4d, [r1d+3]
+    cmovs       r1d, r4d
+    sar         r1d, 2           ; H/4
+    lea         r1d, [r1d*5]     ; 5*(H/4)
+    test        r1d, r1d
+    lea         r4d, [r1d+15]
+    cmovs       r1d, r4d
+    sar         r1d, 4           ; (5*(H/4))/16
+%endif
+    movd         m0, r1d
+
+    add         r1d, r5d
+    add         r3d, r1d
+    shl         r1d, 3
+    sub         r3d, r1d          ; a
+
+    movd         m1, r5d
+    movd         m3, r3d
+    SPLATW       m0, m0, 0        ; H
+    SPLATW       m1, m1, 0        ; V
+    SPLATW       m3, m3, 0        ; a
+%ifidn %1, svq3
+    SWAP          0, 1
+%endif
+    mova         m2, m0
+%if mmsize == 8
+    mova         m5, m0
+%endif
+    pmullw       m0, [pw_0to7]    ; 0*H, 1*H, ..., 7*H  (words)
+%if mmsize == 16
+    psllw        m2, 3
+%else
+    psllw        m5, 3
+    psllw        m2, 2
+    mova         m6, m5
+    paddw        m6, m2
+%endif
+    paddw        m0, m3           ; a + {0,1,2,3,4,5,6,7}*H
+    paddw        m2, m0           ; a + {8,9,10,11,12,13,14,15}*H
+%if mmsize == 8
+    paddw        m5, m0           ; a + {8,9,10,11}*H
+    paddw        m6, m0           ; a + {12,13,14,15}*H
+%endif
+
+    mov          r4, 8
+.loop:
+    mova         m3, m0           ; b[0..7]
+    mova         m4, m2           ; b[8..15]
+    psraw        m3, 5
+    psraw        m4, 5
+    packuswb     m3, m4
+    mova       [r0], m3
+%if mmsize == 8
+    mova         m3, m5           ; b[8..11]
+    mova         m4, m6           ; b[12..15]
+    psraw        m3, 5
+    psraw        m4, 5
+    packuswb     m3, m4
+    mova     [r0+8], m3
+%endif
+    paddw        m0, m1
+    paddw        m2, m1
+%if mmsize == 8
+    paddw        m5, m1
+    paddw        m6, m1
+%endif
+
+    mova         m3, m0           ; b[0..7]
+    mova         m4, m2           ; b[8..15]
+    psraw        m3, 5
+    psraw        m4, 5
+    packuswb     m3, m4
+    mova    [r0+r2], m3
+%if mmsize == 8
+    mova         m3, m5           ; b[8..11]
+    mova         m4, m6           ; b[12..15]
+    psraw        m3, 5
+    psraw        m4, 5
+    packuswb     m3, m4
+    mova  [r0+r2+8], m3
+%endif
+    paddw        m0, m1
+    paddw        m2, m1
+%if mmsize == 8
+    paddw        m5, m1
+    paddw        m6, m1
+%endif
+
+    lea          r0, [r0+r2*2]
+    dec          r4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+H264_PRED16x16_PLANE h264
+H264_PRED16x16_PLANE rv40
+H264_PRED16x16_PLANE svq3
+INIT_MMX mmxext
+H264_PRED16x16_PLANE h264
+H264_PRED16x16_PLANE rv40
+H264_PRED16x16_PLANE svq3
+INIT_XMM sse2
+H264_PRED16x16_PLANE h264
+H264_PRED16x16_PLANE rv40
+H264_PRED16x16_PLANE svq3
+INIT_XMM ssse3
+H264_PRED16x16_PLANE h264
+H264_PRED16x16_PLANE rv40
+H264_PRED16x16_PLANE svq3
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_plane_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro H264_PRED8x8_PLANE 0
+cglobal pred8x8_plane_8, 2,9,7
+    mov          r2, r1           ; +stride
+    neg          r1               ; -stride
+
+    movd         m0, [r0+r1  -1]
+%if mmsize == 8
+    pxor         m2, m2
+    movh         m1, [r0+r1  +4 ]
+    punpcklbw    m0, m2
+    punpcklbw    m1, m2
+    pmullw       m0, [pw_m4to4]
+    pmullw       m1, [pw_m4to4+8]
+%else ; mmsize == 16
+%if cpuflag(ssse3)
+    movhps       m0, [r0+r1  +4]   ; this reads 4 bytes more than necessary
+    pmaddubsw    m0, [plane8_shuf] ; H coefficients
+%else ; sse2
+    pxor         m2, m2
+    movd         m1, [r0+r1  +4]
+    punpckldq    m0, m1
+    punpcklbw    m0, m2
+    pmullw       m0, [pw_m4to4]
+%endif
+    movhlps      m1, m0
+%endif
+    paddw        m0, m1
+
+%if notcpuflag(ssse3)
+%if cpuflag(mmxext)
+    PSHUFLW      m1, m0, 0xE
+%elif cpuflag(mmx)
+    mova         m1, m0
+    psrlq        m1, 32
+%endif
+    paddw        m0, m1
+%endif ; !ssse3
+
+%if cpuflag(mmxext)
+    PSHUFLW      m1, m0, 0x1
+%elif cpuflag(mmx)
+    mova         m1, m0
+    psrlq        m1, 16
+%endif
+    paddw        m0, m1           ; sum of H coefficients
+
+    lea          r4, [r0+r2*4-1]
+    lea          r3, [r0     -1]
+    add          r4, r2
+
+%if ARCH_X86_64
+%define e_reg r8
+%else
+%define e_reg r0
+%endif
+
+    movzx     e_reg, byte [r3+r2*2   ]
+    movzx        r5, byte [r4+r1     ]
+    sub          r5, e_reg
+
+    movzx     e_reg, byte [r3        ]
+%if ARCH_X86_64
+    movzx        r7, byte [r4+r2     ]
+    sub          r7, e_reg
+    sub          r5, r7
+%else
+    movzx        r6, byte [r4+r2     ]
+    sub          r6, e_reg
+    lea          r5, [r5+r6*4]
+    sub          r5, r6
+%endif
+
+    movzx     e_reg, byte [r3+r1     ]
+    movzx        r6, byte [r4+r2*2   ]
+    sub          r6, e_reg
+%if ARCH_X86_64
+    add          r6, r7
+%endif
+    lea          r5, [r5+r6*4]
+
+    movzx     e_reg, byte [r3+r2     ]
+    movzx        r6, byte [r4        ]
+    sub          r6, e_reg
+    lea          r6, [r5+r6*2]
+
+    lea          r5, [r6*9+16]
+    lea          r5, [r5+r6*8]
+    sar          r5, 5
+
+%if ARCH_X86_64 == 0
+    mov          r0, r0m
+%endif
+
+    movzx        r3, byte [r4+r2*2  ]
+    movzx        r4, byte [r0+r1  +7]
+    lea          r3, [r3+r4+1]
+    shl          r3, 4
+    movd        r1d, m0
+    movsx       r1d, r1w
+    imul        r1d, 17
+    add         r1d, 16
+    sar         r1d, 5
+    movd         m0, r1d
+    add         r1d, r5d
+    sub         r3d, r1d
+    add         r1d, r1d
+    sub         r3d, r1d          ; a
+
+    movd         m1, r5d
+    movd         m3, r3d
+    SPLATW       m0, m0, 0        ; H
+    SPLATW       m1, m1, 0        ; V
+    SPLATW       m3, m3, 0        ; a
+%if mmsize == 8
+    mova         m2, m0
+%endif
+    pmullw       m0, [pw_0to7]    ; 0*H, 1*H, ..., 7*H  (words)
+    paddw        m0, m3           ; a + {0,1,2,3,4,5,6,7}*H
+%if mmsize == 8
+    psllw        m2, 2
+    paddw        m2, m0           ; a + {4,5,6,7}*H
+%endif
+
+    mov          r4, 4
+ALIGN 16
+.loop:
+%if mmsize == 16
+    mova         m3, m0           ; b[0..7]
+    paddw        m0, m1
+    psraw        m3, 5
+    mova         m4, m0           ; V+b[0..7]
+    paddw        m0, m1
+    psraw        m4, 5
+    packuswb     m3, m4
+    movh       [r0], m3
+    movhps  [r0+r2], m3
+%else ; mmsize == 8
+    mova         m3, m0           ; b[0..3]
+    mova         m4, m2           ; b[4..7]
+    paddw        m0, m1
+    paddw        m2, m1
+    psraw        m3, 5
+    psraw        m4, 5
+    mova         m5, m0           ; V+b[0..3]
+    mova         m6, m2           ; V+b[4..7]
+    paddw        m0, m1
+    paddw        m2, m1
+    psraw        m5, 5
+    psraw        m6, 5
+    packuswb     m3, m4
+    packuswb     m5, m6
+    mova       [r0], m3
+    mova    [r0+r2], m5
+%endif
+
+    lea          r0, [r0+r2*2]
+    dec          r4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+H264_PRED8x8_PLANE
+INIT_MMX mmxext
+H264_PRED8x8_PLANE
+INIT_XMM sse2
+H264_PRED8x8_PLANE
+INIT_XMM ssse3
+H264_PRED8x8_PLANE
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_vertical_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal pred8x8_vertical_8, 2,2
+    sub    r0, r1
+    movq  mm0, [r0]
+%rep 3
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    lea    r0, [r0+r1*2]
+%endrep
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_horizontal_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8_H 0
+cglobal pred8x8_horizontal_8, 2,3
+    mov       r2, 4
+%if cpuflag(ssse3)
+    mova      m2, [pb_3]
+%endif
+.loop:
+    SPLATB_LOAD m0, r0+r1*0-1, m2
+    SPLATB_LOAD m1, r0+r1*1-1, m2
+    mova [r0+r1*0], m0
+    mova [r0+r1*1], m1
+    lea       r0, [r0+r1*2]
+    dec       r2
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+PRED8x8_H
+INIT_MMX mmxext
+PRED8x8_H
+INIT_MMX ssse3
+PRED8x8_H
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_top_dc_8_mmxext(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pred8x8_top_dc_8, 2,5
+    sub         r0, r1
+    movq       mm0, [r0]
+    pxor       mm1, mm1
+    pxor       mm2, mm2
+    lea         r2, [r0+r1*2]
+    punpckhbw  mm1, mm0
+    punpcklbw  mm0, mm2
+    psadbw     mm1, mm2        ; s1
+    lea         r3, [r2+r1*2]
+    psadbw     mm0, mm2        ; s0
+    psrlw      mm1, 1
+    psrlw      mm0, 1
+    pavgw      mm1, mm2
+    lea         r4, [r3+r1*2]
+    pavgw      mm0, mm2
+    pshufw     mm1, mm1, 0
+    pshufw     mm0, mm0, 0     ; dc0 (w)
+    packuswb   mm0, mm1        ; dc0,dc1 (b)
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    lea         r0, [r3+r1*2]
+    movq [r2+r1*1], mm0
+    movq [r2+r1*2], mm0
+    movq [r3+r1*1], mm0
+    movq [r3+r1*2], mm0
+    movq [r0+r1*1], mm0
+    movq [r0+r1*2], mm0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_dc_8_mmxext(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8_dc_8, 2,5
+    sub       r0, r1
+    pxor      m7, m7
+    movd      m0, [r0+0]
+    movd      m1, [r0+4]
+    psadbw    m0, m7            ; s0
+    mov       r4, r0
+    psadbw    m1, m7            ; s1
+
+    movzx    r2d, byte [r0+r1*1-1]
+    movzx    r3d, byte [r0+r1*2-1]
+    lea       r0, [r0+r1*2]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*1-1]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*2-1]
+    add      r2d, r3d
+    lea       r0, [r0+r1*2]
+    movd      m2, r2d            ; s2
+    movzx    r2d, byte [r0+r1*1-1]
+    movzx    r3d, byte [r0+r1*2-1]
+    lea       r0, [r0+r1*2]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*1-1]
+    add      r2d, r3d
+    movzx    r3d, byte [r0+r1*2-1]
+    add      r2d, r3d
+    movd      m3, r2d            ; s3
+
+    punpcklwd m0, m1
+    mov       r0, r4
+    punpcklwd m2, m3
+    punpckldq m0, m2            ; s0, s1, s2, s3
+    pshufw    m3, m0, 11110110b ; s2, s1, s3, s3
+    lea       r2, [r0+r1*2]
+    pshufw    m0, m0, 01110100b ; s0, s1, s3, s1
+    paddw     m0, m3
+    lea       r3, [r2+r1*2]
+    psrlw     m0, 2
+    pavgw     m0, m7            ; s0+s2, s1, s3, s1+s3
+    lea       r4, [r3+r1*2]
+    packuswb  m0, m0
+    punpcklbw m0, m0
+    movq      m1, m0
+    punpcklbw m0, m0
+    punpckhbw m1, m1
+    movq [r0+r1*1], m0
+    movq [r0+r1*2], m0
+    movq [r2+r1*1], m0
+    movq [r2+r1*2], m0
+    movq [r3+r1*1], m1
+    movq [r3+r1*2], m1
+    movq [r4+r1*1], m1
+    movq [r4+r1*2], m1
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_dc_rv40_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8_dc_rv40_8, 2,7
+    mov       r4, r0
+    sub       r0, r1
+    pxor      mm0, mm0
+    psadbw    mm0, [r0]
+    dec        r0
+    movzx     r5d, byte [r0+r1*1]
+    movd      r6d, mm0
+    lea        r0, [r0+r1*2]
+%rep 3
+    movzx     r2d, byte [r0+r1*0]
+    movzx     r3d, byte [r0+r1*1]
+    add       r5d, r2d
+    add       r6d, r3d
+    lea        r0, [r0+r1*2]
+%endrep
+    movzx     r2d, byte [r0+r1*0]
+    add       r5d, r6d
+    lea       r2d, [r2+r5+8]
+    shr       r2d, 4
+    movd      mm0, r2d
+    punpcklbw mm0, mm0
+    pshufw    mm0, mm0, 0
+    mov       r3d, 4
+.loop:
+    movq [r4+r1*0], mm0
+    movq [r4+r1*1], mm0
+    lea   r4, [r4+r1*2]
+    dec   r3d
+    jg .loop
+    REP_RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_tm_vp8_8(uint8_t *src, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8_TM 0
+cglobal pred8x8_tm_vp8_8, 2,6
+    sub        r0, r1
+    pxor      mm7, mm7
+    movq      mm0, [r0]
+    movq      mm1, mm0
+    punpcklbw mm0, mm7
+    punpckhbw mm1, mm7
+    movzx     r4d, byte [r0-1]
+    mov       r5d, 4
+.loop:
+    movzx     r2d, byte [r0+r1*1-1]
+    movzx     r3d, byte [r0+r1*2-1]
+    sub       r2d, r4d
+    sub       r3d, r4d
+    movd      mm2, r2d
+    movd      mm4, r3d
+    SPLATW    mm2, mm2, 0
+    SPLATW    mm4, mm4, 0
+    movq      mm3, mm2
+    movq      mm5, mm4
+    paddw     mm2, mm0
+    paddw     mm3, mm1
+    paddw     mm4, mm0
+    paddw     mm5, mm1
+    packuswb  mm2, mm3
+    packuswb  mm4, mm5
+    movq [r0+r1*1], mm2
+    movq [r0+r1*2], mm4
+    lea        r0, [r0+r1*2]
+    dec       r5d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+PRED8x8_TM
+INIT_MMX mmxext
+PRED8x8_TM
+
+INIT_XMM sse2
+cglobal pred8x8_tm_vp8_8, 2,6,4
+    sub          r0, r1
+    pxor       xmm1, xmm1
+    movq       xmm0, [r0]
+    punpcklbw  xmm0, xmm1
+    movzx       r4d, byte [r0-1]
+    mov         r5d, 4
+.loop:
+    movzx       r2d, byte [r0+r1*1-1]
+    movzx       r3d, byte [r0+r1*2-1]
+    sub         r2d, r4d
+    sub         r3d, r4d
+    movd       xmm2, r2d
+    movd       xmm3, r3d
+    pshuflw    xmm2, xmm2, 0
+    pshuflw    xmm3, xmm3, 0
+    punpcklqdq xmm2, xmm2
+    punpcklqdq xmm3, xmm3
+    paddw      xmm2, xmm0
+    paddw      xmm3, xmm0
+    packuswb   xmm2, xmm3
+    movq   [r0+r1*1], xmm2
+    movhps [r0+r1*2], xmm2
+    lea          r0, [r0+r1*2]
+    dec         r5d
+    jg .loop
+    REP_RET
+
+INIT_XMM ssse3
+cglobal pred8x8_tm_vp8_8, 2,3,6
+    sub          r0, r1
+    movdqa     xmm4, [tm_shuf]
+    pxor       xmm1, xmm1
+    movq       xmm0, [r0]
+    punpcklbw  xmm0, xmm1
+    movd       xmm5, [r0-4]
+    pshufb     xmm5, xmm4
+    mov         r2d, 4
+.loop:
+    movd       xmm2, [r0+r1*1-4]
+    movd       xmm3, [r0+r1*2-4]
+    pshufb     xmm2, xmm4
+    pshufb     xmm3, xmm4
+    psubw      xmm2, xmm5
+    psubw      xmm3, xmm5
+    paddw      xmm2, xmm0
+    paddw      xmm3, xmm0
+    packuswb   xmm2, xmm3
+    movq   [r0+r1*1], xmm2
+    movhps [r0+r1*2], xmm2
+    lea          r0, [r0+r1*2]
+    dec         r2d
+    jg .loop
+    REP_RET
+
+; dest, left, right, src, tmp
+; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
+%macro PRED4x4_LOWPASS 5
+    mova    %5, %2
+    pavgb   %2, %3
+    pxor    %3, %5
+    mova    %1, %4
+    pand    %3, [pb_1]
+    psubusb %2, %3
+    pavgb   %1, %2
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_top_dc_8(uint8_t *src, int has_topleft, int has_topright,
+;                           int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_TOP_DC 0
+cglobal pred8x8l_top_dc_8, 4,4
+    sub          r0, r3
+    pxor        mm7, mm7
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+    jmp .body
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+.body:
+    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
+    psadbw   mm7, mm0
+    paddw    mm7, [pw_4]
+    psrlw    mm7, 3
+    pshufw   mm7, mm7, 0
+    packuswb mm7, mm7
+%rep 3
+    movq [r0+r3*1], mm7
+    movq [r0+r3*2], mm7
+    lea    r0, [r0+r3*2]
+%endrep
+    movq [r0+r3*1], mm7
+    movq [r0+r3*2], mm7
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_TOP_DC
+INIT_MMX ssse3
+PRED8x8L_TOP_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_dc_8(uint8_t *src, int has_topleft, int has_topright,
+;                       int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_DC 0
+cglobal pred8x8l_dc_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .body
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.body:
+    lea          r1, [r0+r3*2]
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    pxor        mm0, mm0
+    pxor        mm1, mm1
+    lea          r2, [r1+r3*2]
+    psadbw      mm0, mm7
+    psadbw      mm1, mm6
+    paddw       mm0, [pw_8]
+    paddw       mm0, mm1
+    lea          r4, [r2+r3*2]
+    psrlw       mm0, 4
+    pshufw      mm0, mm0, 0
+    packuswb    mm0, mm0
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    movq [r1+r3*1], mm0
+    movq [r1+r3*2], mm0
+    movq [r2+r3*1], mm0
+    movq [r2+r3*2], mm0
+    movq [r4+r3*1], mm0
+    movq [r4+r3*2], mm0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_DC
+INIT_MMX ssse3
+PRED8x8L_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_8(uint8_t *src, int has_topleft,
+;                               int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_HORIZONTAL 0
+cglobal pred8x8l_horizontal_8, 4,4
+    sub          r0, r3
+    lea          r2, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    test        r1d, r1d
+    lea          r1, [r0+r3]
+    cmovnz       r1, r0
+    punpckhbw   mm0, [r1+r3*0-8]
+    movq        mm1, [r2+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r2, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r1+r3*0-8]
+    mov          r0, r2
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm3, mm7
+    lea         r1, [r0+r3*2]
+    movq       mm7, mm3
+    punpckhbw  mm3, mm3
+    punpcklbw  mm7, mm7
+    pshufw     mm0, mm3, 0xff
+    pshufw     mm1, mm3, 0xaa
+    lea         r2, [r1+r3*2]
+    pshufw     mm2, mm3, 0x55
+    pshufw     mm3, mm3, 0x00
+    pshufw     mm4, mm7, 0xff
+    pshufw     mm5, mm7, 0xaa
+    pshufw     mm6, mm7, 0x55
+    pshufw     mm7, mm7, 0x00
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm1
+    movq [r1+r3*1], mm2
+    movq [r1+r3*2], mm3
+    movq [r2+r3*1], mm4
+    movq [r2+r3*2], mm5
+    lea         r0, [r2+r3*2]
+    movq [r0+r3*1], mm6
+    movq [r0+r3*2], mm7
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_HORIZONTAL
+INIT_MMX ssse3
+PRED8x8L_HORIZONTAL
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_8(uint8_t *src, int has_topleft, int has_topright,
+;                             int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_VERTICAL 0
+cglobal pred8x8l_vertical_8, 4,4
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+    jmp .body
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+.body:
+    PRED4x4_LOWPASS mm0, mm2, mm1, mm3, mm5
+%rep 3
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    lea    r0, [r0+r3*2]
+%endrep
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_VERTICAL
+INIT_MMX ssse3
+PRED8x8L_VERTICAL
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_left_8(uint8_t *src, int has_topleft,
+;                              int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8l_down_left_8, 4,5
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+    jmp .do_top
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq        mm7, mm4
+    test        r2d, r2d
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    lea          r1, [r0+r3*2]
+    movq        mm6, mm1
+    psrlq       mm1, 56
+    movq        mm4, mm1
+    lea          r2, [r1+r3*2]
+    movq        mm2, mm6
+    PALIGNR     mm2, mm7, 1, mm0
+    movq        mm3, mm6
+    PALIGNR     mm3, mm7, 7, mm0
+    PALIGNR     mm4, mm6, 1, mm0
+    movq        mm5, mm7
+    movq        mm1, mm7
+    movq        mm7, mm6
+    lea          r4, [r2+r3*2]
+    psllq       mm1, 8
+    PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6
+    PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6
+    movq  [r4+r3*2], mm1
+    movq        mm2, mm0
+    psllq       mm1, 8
+    psrlq       mm2, 56
+    psllq       mm0, 8
+    por         mm1, mm2
+    movq  [r4+r3*1], mm1
+    movq        mm2, mm0
+    psllq       mm1, 8
+    psrlq       mm2, 56
+    psllq       mm0, 8
+    por         mm1, mm2
+    movq  [r2+r3*2], mm1
+    movq        mm2, mm0
+    psllq       mm1, 8
+    psrlq       mm2, 56
+    psllq       mm0, 8
+    por         mm1, mm2
+    movq  [r2+r3*1], mm1
+    movq        mm2, mm0
+    psllq       mm1, 8
+    psrlq       mm2, 56
+    psllq       mm0, 8
+    por         mm1, mm2
+    movq  [r1+r3*2], mm1
+    movq        mm2, mm0
+    psllq       mm1, 8
+    psrlq       mm2, 56
+    psllq       mm0, 8
+    por         mm1, mm2
+    movq  [r1+r3*1], mm1
+    movq        mm2, mm0
+    psllq       mm1, 8
+    psrlq       mm2, 56
+    psllq       mm0, 8
+    por         mm1, mm2
+    movq  [r0+r3*2], mm1
+    psllq       mm1, 8
+    psrlq       mm0, 56
+    por         mm1, mm0
+    movq  [r0+r3*1], mm1
+    RET
+
+%macro PRED8x8L_DOWN_LEFT 0
+cglobal pred8x8l_down_left_8, 4,4
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+    jmp .do_top
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq    xmm3, mm4
+    test        r2d, r2d ; top_right
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    movq2dq    xmm4, mm1
+    psrlq       mm1, 56
+    movq2dq    xmm5, mm1
+    lea         r1, [r0+r3*2]
+    pslldq    xmm4, 8
+    por       xmm3, xmm4
+    movdqa    xmm2, xmm3
+    psrldq    xmm2, 1
+    pslldq    xmm5, 15
+    por       xmm2, xmm5
+    lea         r2, [r1+r3*2]
+    movdqa    xmm1, xmm3
+    pslldq    xmm1, 1
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r0+r3*2], xmm0
+    psrldq    xmm0, 1
+    lea         r0, [r2+r3*2]
+    movq [r1+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r1+r3*2], xmm0
+    psrldq    xmm0, 1
+    movq [r2+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r2+r3*2], xmm0
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm0
+    psrldq    xmm0, 1
+    movq [r0+r3*2], xmm0
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_DOWN_LEFT
+INIT_MMX ssse3
+PRED8x8L_DOWN_LEFT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_right_8_mmxext(uint8_t *src, int has_topleft,
+;                                      int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8l_down_right_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d ; top_left
+    jz .fix_lt_1
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    movq        mm6, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d ; top_left
+    jz .fix_lt_2
+    test        r2d, r2d ; top_right
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq        mm5, mm4
+    jmp .body
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d ; top_right
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.body:
+    lea         r1, [r0+r3*2]
+    movq       mm1, mm7
+    movq       mm7, mm5
+    movq       mm5, mm6
+    movq       mm2, mm7
+    lea         r2, [r1+r3*2]
+    PALIGNR    mm2, mm6, 1, mm0
+    movq       mm3, mm7
+    PALIGNR    mm3, mm6, 7, mm0
+    movq       mm4, mm7
+    lea         r4, [r2+r3*2]
+    psrlq      mm4, 8
+    PRED4x4_LOWPASS mm0, mm1, mm2, mm5, mm6
+    PRED4x4_LOWPASS mm1, mm3, mm4, mm7, mm6
+    movq [r4+r3*2], mm0
+    movq       mm2, mm1
+    psrlq      mm0, 8
+    psllq      mm2, 56
+    psrlq      mm1, 8
+    por        mm0, mm2
+    movq [r4+r3*1], mm0
+    movq       mm2, mm1
+    psrlq      mm0, 8
+    psllq      mm2, 56
+    psrlq      mm1, 8
+    por        mm0, mm2
+    movq [r2+r3*2], mm0
+    movq       mm2, mm1
+    psrlq      mm0, 8
+    psllq      mm2, 56
+    psrlq      mm1, 8
+    por        mm0, mm2
+    movq [r2+r3*1], mm0
+    movq       mm2, mm1
+    psrlq      mm0, 8
+    psllq      mm2, 56
+    psrlq      mm1, 8
+    por        mm0, mm2
+    movq [r1+r3*2], mm0
+    movq       mm2, mm1
+    psrlq      mm0, 8
+    psllq      mm2, 56
+    psrlq      mm1, 8
+    por        mm0, mm2
+    movq [r1+r3*1], mm0
+    movq       mm2, mm1
+    psrlq      mm0, 8
+    psllq      mm2, 56
+    psrlq      mm1, 8
+    por        mm0, mm2
+    movq [r0+r3*2], mm0
+    psrlq      mm0, 8
+    psllq      mm1, 56
+    por        mm0, mm1
+    movq [r0+r3*1], mm0
+    RET
+
+%macro PRED8x8L_DOWN_RIGHT 0
+cglobal pred8x8l_down_right_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jz .fix_lt_1
+    jmp .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    movq2dq    xmm3, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq2dq    xmm1, mm7
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq   xmm4, mm4
+    lea         r1, [r0+r3*2]
+    movdqa    xmm0, xmm3
+    pslldq    xmm4, 8
+    por       xmm3, xmm4
+    lea         r2, [r1+r3*2]
+    pslldq    xmm4, 1
+    por       xmm1, xmm4
+    psrldq    xmm0, 7
+    pslldq    xmm0, 15
+    psrldq    xmm0, 7
+    por       xmm1, xmm0
+    lea         r0, [r2+r3*2]
+    movdqa    xmm2, xmm3
+    psrldq    xmm2, 1
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
+    movdqa    xmm1, xmm0
+    psrldq    xmm1, 1
+    movq [r0+r3*2], xmm0
+    movq [r0+r3*1], xmm1
+    psrldq    xmm0, 2
+    psrldq    xmm1, 2
+    movq [r2+r3*2], xmm0
+    movq [r2+r3*1], xmm1
+    psrldq    xmm0, 2
+    psrldq    xmm1, 2
+    movq [r1+r3*2], xmm0
+    movq [r1+r3*1], xmm1
+    psrldq    xmm0, 2
+    psrldq    xmm1, 2
+    movq [r4+r3*2], xmm0
+    movq [r4+r3*1], xmm1
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_DOWN_RIGHT
+INIT_MMX ssse3
+PRED8x8L_DOWN_RIGHT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_right_8(uint8_t *src, int has_topleft,
+;                                   int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8l_vertical_right_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jz .fix_lt_1
+    jmp .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm7, mm2
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    lea         r1, [r0+r3*2]
+    movq       mm2, mm6
+    movq       mm3, mm6
+    PALIGNR    mm3, mm7, 7, mm0
+    PALIGNR    mm6, mm7, 6, mm1
+    movq       mm4, mm3
+    pavgb      mm3, mm2
+    lea         r2, [r1+r3*2]
+    PRED4x4_LOWPASS mm0, mm6, mm2, mm4, mm5
+    movq [r0+r3*1], mm3
+    movq [r0+r3*2], mm0
+    movq       mm5, mm0
+    movq       mm6, mm3
+    movq       mm1, mm7
+    movq       mm2, mm1
+    psllq      mm2, 8
+    movq       mm3, mm1
+    psllq      mm3, 16
+    lea         r4, [r2+r3*2]
+    PRED4x4_LOWPASS mm0, mm1, mm3, mm2, mm4
+    PALIGNR    mm6, mm0, 7, mm2
+    movq [r1+r3*1], mm6
+    psllq      mm0, 8
+    PALIGNR    mm5, mm0, 7, mm1
+    movq [r1+r3*2], mm5
+    psllq      mm0, 8
+    PALIGNR    mm6, mm0, 7, mm2
+    movq [r2+r3*1], mm6
+    psllq      mm0, 8
+    PALIGNR    mm5, mm0, 7, mm1
+    movq [r2+r3*2], mm5
+    psllq      mm0, 8
+    PALIGNR    mm6, mm0, 7, mm2
+    movq [r4+r3*1], mm6
+    psllq      mm0, 8
+    PALIGNR    mm5, mm0, 7, mm1
+    movq [r4+r3*2], mm5
+    RET
+
+%macro PRED8x8L_VERTICAL_RIGHT 0
+cglobal pred8x8l_vertical_right_8, 4,5,7
+    ; manually spill XMM registers for Win64 because
+    ; the code here is initialized with INIT_MMX
+    WIN64_SPILL_XMM 7
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq2dq    xmm0, mm2
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    lea           r1, [r0+r3*2]
+    movq2dq     xmm4, mm6
+    pslldq      xmm4, 8
+    por         xmm0, xmm4
+    movdqa      xmm6, [pw_ff00]
+    movdqa      xmm1, xmm0
+    lea           r2, [r1+r3*2]
+    movdqa      xmm2, xmm0
+    movdqa      xmm3, xmm0
+    pslldq      xmm0, 1
+    pslldq      xmm1, 2
+    pavgb       xmm2, xmm0
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
+    pandn       xmm6, xmm4
+    movdqa      xmm5, xmm4
+    psrlw       xmm4, 8
+    packuswb    xmm6, xmm4
+    movhlps     xmm4, xmm6
+    movhps [r0+r3*2], xmm5
+    movhps [r0+r3*1], xmm2
+    psrldq      xmm5, 4
+    movss       xmm5, xmm6
+    psrldq      xmm2, 4
+    movss       xmm2, xmm4
+    lea           r0, [r2+r3*2]
+    psrldq      xmm5, 1
+    psrldq      xmm2, 1
+    movq        [r0+r3*2], xmm5
+    movq        [r0+r3*1], xmm2
+    psrldq      xmm5, 1
+    psrldq      xmm2, 1
+    movq        [r2+r3*2], xmm5
+    movq        [r2+r3*1], xmm2
+    psrldq      xmm5, 1
+    psrldq      xmm2, 1
+    movq        [r1+r3*2], xmm5
+    movq        [r1+r3*1], xmm2
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_VERTICAL_RIGHT
+INIT_MMX ssse3
+PRED8x8L_VERTICAL_RIGHT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_left_8(uint8_t *src, int has_topleft,
+;                                  int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_VERTICAL_LEFT 0
+cglobal pred8x8l_vertical_left_8, 4,4
+    sub          r0, r3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+    jmp .do_top
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq    xmm4, mm4
+    test        r2d, r2d
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    movq2dq   xmm3, mm1
+    lea         r1, [r0+r3*2]
+    pslldq    xmm3, 8
+    por       xmm4, xmm3
+    movdqa    xmm2, xmm4
+    movdqa    xmm1, xmm4
+    movdqa    xmm3, xmm4
+    psrldq    xmm2, 1
+    pslldq    xmm1, 1
+    pavgb     xmm3, xmm2
+    lea         r2, [r1+r3*2]
+INIT_XMM cpuname
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm4, xmm5
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm3
+    movq [r0+r3*2], xmm0
+    lea         r0, [r2+r3*2]
+    psrldq    xmm3, 1
+    psrldq    xmm0, 1
+    movq [r1+r3*1], xmm3
+    movq [r1+r3*2], xmm0
+    psrldq    xmm3, 1
+    psrldq    xmm0, 1
+    movq [r2+r3*1], xmm3
+    movq [r2+r3*2], xmm0
+    psrldq    xmm3, 1
+    psrldq    xmm0, 1
+    movq [r0+r3*1], xmm3
+    movq [r0+r3*2], xmm0
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_VERTICAL_LEFT
+INIT_MMX ssse3
+PRED8x8L_VERTICAL_LEFT
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_up_8(uint8_t *src, int has_topleft,
+;                                  int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED8x8L_HORIZONTAL_UP 0
+cglobal pred8x8l_horizontal_up_8, 4,4
+    sub          r0, r3
+    lea          r2, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    test        r1d, r1d
+    lea          r1, [r0+r3]
+    cmovnz       r1, r0
+    punpckhbw   mm0, [r1+r3*0-8]
+    movq        mm1, [r2+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r2, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r1+r3*0-8]
+    mov          r0, r2
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    movq       mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq       mm4, mm0
+    movq       mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq      mm1, 56
+    PALIGNR    mm7, mm1, 7, mm3
+    lea         r1, [r0+r3*2]
+    pshufw     mm0, mm7, 00011011b ; l6 l7 l4 l5 l2 l3 l0 l1
+    psllq      mm7, 56             ; l7 .. .. .. .. .. .. ..
+    movq       mm2, mm0
+    psllw      mm0, 8
+    psrlw      mm2, 8
+    por        mm2, mm0            ; l7 l6 l5 l4 l3 l2 l1 l0
+    movq       mm3, mm2
+    movq       mm4, mm2
+    movq       mm5, mm2
+    psrlq      mm2, 8
+    psrlq      mm3, 16
+    lea         r2, [r1+r3*2]
+    por        mm2, mm7            ; l7 l7 l6 l5 l4 l3 l2 l1
+    punpckhbw  mm7, mm7
+    por        mm3, mm7            ; l7 l7 l7 l6 l5 l4 l3 l2
+    pavgb      mm4, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm5, mm2, mm6
+    movq       mm5, mm4
+    punpcklbw  mm4, mm1            ; p4 p3 p2 p1
+    punpckhbw  mm5, mm1            ; p8 p7 p6 p5
+    movq       mm6, mm5
+    movq       mm7, mm5
+    movq       mm0, mm5
+    PALIGNR    mm5, mm4, 2, mm1
+    pshufw     mm1, mm6, 11111001b
+    PALIGNR    mm6, mm4, 4, mm2
+    pshufw     mm2, mm7, 11111110b
+    PALIGNR    mm7, mm4, 6, mm3
+    pshufw     mm3, mm0, 11111111b
+    movq [r0+r3*1], mm4
+    movq [r0+r3*2], mm5
+    lea         r0, [r2+r3*2]
+    movq [r1+r3*1], mm6
+    movq [r1+r3*2], mm7
+    movq [r2+r3*1], mm0
+    movq [r2+r3*2], mm1
+    movq [r0+r3*1], mm2
+    movq [r0+r3*2], mm3
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_HORIZONTAL_UP
+INIT_MMX ssse3
+PRED8x8L_HORIZONTAL_UP
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_down_8(uint8_t *src, int has_topleft,
+;                                    int has_topright, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred8x8l_horizontal_down_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    movq        mm6, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq       mm5, mm4
+    lea         r1, [r0+r3*2]
+    psllq      mm7, 56
+    movq       mm2, mm5
+    movq       mm3, mm6
+    movq       mm4, mm2
+    PALIGNR    mm2, mm6, 7, mm5
+    PALIGNR    mm6, mm7, 7, mm0
+    lea         r2, [r1+r3*2]
+    PALIGNR    mm4, mm3, 1, mm7
+    movq       mm5, mm3
+    pavgb      mm3, mm6
+    PRED4x4_LOWPASS mm0, mm4, mm6, mm5, mm7
+    movq       mm4, mm2
+    movq       mm1, mm2
+    lea         r4, [r2+r3*2]
+    psrlq      mm4, 16
+    psrlq      mm1, 8
+    PRED4x4_LOWPASS mm6, mm4, mm2, mm1, mm5
+    movq       mm7, mm3
+    punpcklbw  mm3, mm0
+    punpckhbw  mm7, mm0
+    movq       mm1, mm7
+    movq       mm0, mm7
+    movq       mm4, mm7
+    movq [r4+r3*2], mm3
+    PALIGNR    mm7, mm3, 2, mm5
+    movq [r4+r3*1], mm7
+    PALIGNR    mm1, mm3, 4, mm5
+    movq [r2+r3*2], mm1
+    PALIGNR    mm0, mm3, 6, mm3
+    movq [r2+r3*1], mm0
+    movq       mm2, mm6
+    movq       mm3, mm6
+    movq [r1+r3*2], mm4
+    PALIGNR    mm6, mm4, 2, mm5
+    movq [r1+r3*1], mm6
+    PALIGNR    mm2, mm4, 4, mm5
+    movq [r0+r3*2], mm2
+    PALIGNR    mm3, mm4, 6, mm4
+    movq [r0+r3*1], mm3
+    RET
+
+%macro PRED8x8L_HORIZONTAL_DOWN 0
+cglobal pred8x8l_horizontal_down_8, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1d, r1d
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test        r2d, r2d
+    jnz .do_top
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .do_top
+.fix_tr_2:
+    punpckhbw   mm3, mm3
+    pshufw      mm1, mm3, 0xFF
+    jmp .do_topright
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq2dq    xmm0, mm2
+    pslldq     xmm0, 8
+    movq        mm4, mm0
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    movq2dq    xmm2, mm1
+    pslldq     xmm2, 15
+    psrldq     xmm2, 8
+    por        xmm0, xmm2
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test        r1d, r1d
+    jz .fix_lt_2
+    test        r2d, r2d
+    jz .fix_tr_1
+.do_top:
+    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
+    movq2dq    xmm1, mm4
+    test        r2d, r2d
+    jz .fix_tr_2
+    movq        mm0, [r0+8]
+    movq        mm5, mm0
+    movq        mm2, mm0
+    movq        mm4, mm0
+    psrlq       mm5, 56
+    PALIGNR     mm2, mm3, 7, mm3
+    PALIGNR     mm5, mm4, 1, mm4
+    PRED4x4_LOWPASS mm1, mm2, mm5, mm0, mm4
+.do_topright:
+    movq2dq    xmm5, mm1
+    pslldq     xmm5, 8
+    por        xmm1, xmm5
+INIT_XMM cpuname
+    lea         r2, [r4+r3*2]
+    movdqa    xmm2, xmm1
+    movdqa    xmm3, xmm1
+    PALIGNR   xmm1, xmm0, 7, xmm4
+    PALIGNR   xmm2, xmm0, 9, xmm5
+    lea         r1, [r2+r3*2]
+    PALIGNR   xmm3, xmm0, 8, xmm0
+    movdqa    xmm4, xmm1
+    pavgb     xmm4, xmm3
+    lea         r0, [r1+r3*2]
+    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm5
+    punpcklbw xmm4, xmm0
+    movhlps   xmm0, xmm4
+    movq   [r0+r3*2], xmm4
+    movq   [r2+r3*2], xmm0
+    psrldq xmm4, 2
+    psrldq xmm0, 2
+    movq   [r0+r3*1], xmm4
+    movq   [r2+r3*1], xmm0
+    psrldq xmm4, 2
+    psrldq xmm0, 2
+    movq   [r1+r3*2], xmm4
+    movq   [r4+r3*2], xmm0
+    psrldq xmm4, 2
+    psrldq xmm0, 2
+    movq   [r1+r3*1], xmm4
+    movq   [r4+r3*1], xmm0
+    RET
+%endmacro
+
+INIT_MMX sse2
+PRED8x8L_HORIZONTAL_DOWN
+INIT_MMX ssse3
+PRED8x8L_HORIZONTAL_DOWN
+
+;-------------------------------------------------------------------------------
+; void ff_pred4x4_dc_8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
+;-------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_dc_8, 3,5
+    pxor   mm7, mm7
+    mov     r4, r0
+    sub     r0, r2
+    movd   mm0, [r0]
+    psadbw mm0, mm7
+    movzx  r1d, byte [r0+r2*1-1]
+    movd   r3d, mm0
+    add    r3d, r1d
+    movzx  r1d, byte [r0+r2*2-1]
+    lea     r0, [r0+r2*2]
+    add    r3d, r1d
+    movzx  r1d, byte [r0+r2*1-1]
+    add    r3d, r1d
+    movzx  r1d, byte [r0+r2*2-1]
+    add    r3d, r1d
+    add    r3d, 4
+    shr    r3d, 3
+    imul   r3d, 0x01010101
+    mov   [r4+r2*0], r3d
+    mov   [r0+r2*0], r3d
+    mov   [r0+r2*1], r3d
+    mov   [r0+r2*2], r3d
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_tm_vp8_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                 int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED4x4_TM 0
+cglobal pred4x4_tm_vp8_8, 3,6
+    sub        r0, r2
+    pxor      mm7, mm7
+    movd      mm0, [r0]
+    punpcklbw mm0, mm7
+    movzx     r4d, byte [r0-1]
+    mov       r5d, 2
+.loop:
+    movzx     r1d, byte [r0+r2*1-1]
+    movzx     r3d, byte [r0+r2*2-1]
+    sub       r1d, r4d
+    sub       r3d, r4d
+    movd      mm2, r1d
+    movd      mm4, r3d
+%if cpuflag(mmxext)
+    pshufw    mm2, mm2, 0
+    pshufw    mm4, mm4, 0
+%else
+    punpcklwd mm2, mm2
+    punpcklwd mm4, mm4
+    punpckldq mm2, mm2
+    punpckldq mm4, mm4
+%endif
+    paddw     mm2, mm0
+    paddw     mm4, mm0
+    packuswb  mm2, mm2
+    packuswb  mm4, mm4
+    movd [r0+r2*1], mm2
+    movd [r0+r2*2], mm4
+    lea        r0, [r0+r2*2]
+    dec       r5d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmx
+PRED4x4_TM
+INIT_MMX mmxext
+PRED4x4_TM
+
+INIT_XMM ssse3
+cglobal pred4x4_tm_vp8_8, 3,3
+    sub         r0, r2
+    movq       mm6, [tm_shuf]
+    pxor       mm1, mm1
+    movd       mm0, [r0]
+    punpcklbw  mm0, mm1
+    movd       mm7, [r0-4]
+    pshufb     mm7, mm6
+    lea         r1, [r0+r2*2]
+    movd       mm2, [r0+r2*1-4]
+    movd       mm3, [r0+r2*2-4]
+    movd       mm4, [r1+r2*1-4]
+    movd       mm5, [r1+r2*2-4]
+    pshufb     mm2, mm6
+    pshufb     mm3, mm6
+    pshufb     mm4, mm6
+    pshufb     mm5, mm6
+    psubw      mm0, mm7
+    paddw      mm2, mm0
+    paddw      mm3, mm0
+    paddw      mm4, mm0
+    paddw      mm5, mm0
+    packuswb   mm2, mm2
+    packuswb   mm3, mm3
+    packuswb   mm4, mm4
+    packuswb   mm5, mm5
+    movd [r0+r2*1], mm2
+    movd [r0+r2*2], mm3
+    movd [r1+r2*1], mm4
+    movd [r1+r2*2], mm5
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_vertical_vp8_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                       int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_vertical_vp8_8, 3,3
+    sub       r0, r2
+    movd      m1, [r0-1]
+    movd      m0, [r0]
+    mova      m2, m0   ;t0 t1 t2 t3
+    punpckldq m0, [r1] ;t0 t1 t2 t3 t4 t5 t6 t7
+    lea       r1, [r0+r2*2]
+    psrlq     m0, 8    ;t1 t2 t3 t4
+    PRED4x4_LOWPASS m3, m1, m0, m2, m4
+    movd [r0+r2*1], m3
+    movd [r0+r2*2], m3
+    movd [r1+r2*1], m3
+    movd [r1+r2*2], m3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_left_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                    int stride)
+;-----------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pred4x4_down_left_8, 3,3
+    sub       r0, r2
+    movq      m1, [r0]
+    punpckldq m1, [r1]
+    movq      m2, m1
+    movq      m3, m1
+    psllq     m1, 8
+    pxor      m2, m1
+    psrlq     m2, 8
+    pxor      m2, m3
+    PRED4x4_LOWPASS m0, m1, m2, m3, m4
+    lea       r1, [r0+r2*2]
+    psrlq     m0, 8
+    movd      [r0+r2*1], m0
+    psrlq     m0, 8
+    movd      [r0+r2*2], m0
+    psrlq     m0, 8
+    movd      [r1+r2*1], m0
+    psrlq     m0, 8
+    movd      [r1+r2*2], m0
+    RET
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_vertical_left_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                        int stride)
+;------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_vertical_left_8, 3,3
+    sub       r0, r2
+    movq      m1, [r0]
+    punpckldq m1, [r1]
+    movq      m3, m1
+    movq      m2, m1
+    psrlq     m3, 8
+    psrlq     m2, 16
+    movq      m4, m3
+    pavgb     m4, m1
+    PRED4x4_LOWPASS m0, m1, m2, m3, m5
+    lea       r1, [r0+r2*2]
+    movh      [r0+r2*1], m4
+    movh      [r0+r2*2], m0
+    psrlq     m4, 8
+    psrlq     m0, 8
+    movh      [r1+r2*1], m4
+    movh      [r1+r2*2], m0
+    RET
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_up_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                        int stride)
+;------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_horizontal_up_8, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movd      m0, [r0+r2*1-4]
+    punpcklbw m0, [r0+r2*2-4]
+    movd      m1, [r1+r2*1-4]
+    punpcklbw m1, [r1+r2*2-4]
+    punpckhwd m0, m1
+    movq      m1, m0
+    punpckhbw m1, m1
+    pshufw    m1, m1, 0xFF
+    punpckhdq m0, m1
+    movq      m2, m0
+    movq      m3, m0
+    movq      m7, m0
+    psrlq     m2, 16
+    psrlq     m3, 8
+    pavgb     m7, m3
+    PRED4x4_LOWPASS m4, m0, m2, m3, m5
+    punpcklbw m7, m4
+    movd    [r0+r2*1], m7
+    psrlq    m7, 16
+    movd    [r0+r2*2], m7
+    psrlq    m7, 16
+    movd    [r1+r2*1], m7
+    movd    [r1+r2*2], m1
+    RET
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_down_8_mmxext(uint8_t *src,
+;                                          const uint8_t *topright, int stride)
+;------------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_horizontal_down_8, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movh      m0, [r0-4]      ; lt ..
+    punpckldq m0, [r0]        ; t3 t2 t1 t0 lt .. .. ..
+    psllq     m0, 8           ; t2 t1 t0 lt .. .. .. ..
+    movd      m1, [r1+r2*2-4] ; l3
+    punpcklbw m1, [r1+r2*1-4] ; l2 l3
+    movd      m2, [r0+r2*2-4] ; l1
+    punpcklbw m2, [r0+r2*1-4] ; l0 l1
+    punpckhwd m1, m2          ; l0 l1 l2 l3
+    punpckhdq m1, m0          ; t2 t1 t0 lt l0 l1 l2 l3
+    movq      m0, m1
+    movq      m2, m1
+    movq      m5, m1
+    psrlq     m0, 16          ; .. .. t2 t1 t0 lt l0 l1
+    psrlq     m2, 8           ; .. t2 t1 t0 lt l0 l1 l2
+    pavgb     m5, m2
+    PRED4x4_LOWPASS m3, m1, m0, m2, m4
+    punpcklbw m5, m3
+    psrlq     m3, 32
+    PALIGNR   m3, m5, 6, m4
+    movh      [r1+r2*2], m5
+    psrlq     m5, 16
+    movh      [r1+r2*1], m5
+    psrlq     m5, 16
+    movh      [r0+r2*2], m5
+    movh      [r0+r2*1], m3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_vertical_right_8_mmxext(uint8_t *src,
+;                                         const uint8_t *topright, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_vertical_right_8, 3,3
+    sub     r0, r2
+    lea     r1, [r0+r2*2]
+    movh    m0, [r0]                    ; ........t3t2t1t0
+    movq    m5, m0
+    PALIGNR m0, [r0-8], 7, m1           ; ......t3t2t1t0lt
+    pavgb   m5, m0
+    PALIGNR m0, [r0+r2*1-8], 7, m1      ; ....t3t2t1t0ltl0
+    movq    m1, m0
+    PALIGNR m0, [r0+r2*2-8], 7, m2      ; ..t3t2t1t0ltl0l1
+    movq    m2, m0
+    PALIGNR m0, [r1+r2*1-8], 7, m3      ; t3t2t1t0ltl0l1l2
+    PRED4x4_LOWPASS m3, m1, m0, m2, m4
+    movq    m1, m3
+    psrlq   m3, 16
+    psllq   m1, 48
+    movh    [r0+r2*1], m5
+    movh    [r0+r2*2], m3
+    PALIGNR m5, m1, 7, m2
+    psllq   m1, 8
+    movh    [r1+r2*1], m5
+    PALIGNR m3, m1, 7, m1
+    movh    [r1+r2*2], m3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_right_8_mmxext(uint8_t *src, const uint8_t *topright,
+;                                     int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_down_right_8, 3,3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movq      m1, [r1-8]
+    movq      m2, [r0+r2*1-8]
+    punpckhbw m2, [r0-8]
+    movh      m3, [r0]
+    punpckhwd m1, m2
+    PALIGNR   m3, m1, 5, m1
+    movq      m1, m3
+    PALIGNR   m3, [r1+r2*1-8], 7, m4
+    movq      m2, m3
+    PALIGNR   m3, [r1+r2*2-8], 7, m4
+    PRED4x4_LOWPASS m0, m3, m1, m2, m4
+    movh      [r1+r2*2], m0
+    psrlq     m0, 8
+    movh      [r1+r2*1], m0
+    psrlq     m0, 8
+    movh      [r0+r2*2], m0
+    psrlq     m0, 8
+    movh      [r0+r2*1], m0
+    RET
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm b/media/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm
new file mode 100644
index 000000000..9e40cfe24
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred_10bit.asm
@@ -0,0 +1,1192 @@
+;*****************************************************************************
+;* MMX/SSE2/AVX-optimized 10-bit H.264 intra prediction code
+;*****************************************************************************
+;* Copyright (C) 2005-2011 x264 project
+;*
+;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pw_1023
+%define pw_pixel_max pw_1023
+cextern pw_512
+cextern pw_16
+cextern pw_8
+cextern pw_4
+cextern pw_2
+cextern pw_1
+cextern pd_16
+
+pw_m32101234: dw -3, -2, -1, 0, 1, 2, 3, 4
+pw_m3:        times 8 dw -3
+pd_17:        times 4 dd 17
+
+SECTION .text
+
+; dest, left, right, src
+; output: %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
+%macro PRED4x4_LOWPASS 4
+    paddw       %2, %3
+    psrlw       %2, 1
+    pavgw       %1, %4, %2
+%endmacro
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_right(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_DR 0
+cglobal pred4x4_down_right_10, 3, 3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movhps    m1, [r1-8]
+    movhps    m2, [r0+r2*1-8]
+    movhps    m4, [r0-8]
+    punpckhwd m2, m4
+    movq      m3, [r0]
+    punpckhdq m1, m2
+    PALIGNR   m3, m1, 10, m1
+    movhps    m4, [r1+r2*1-8]
+    PALIGNR   m0, m3, m4, 14, m4
+    movhps    m4, [r1+r2*2-8]
+    PALIGNR   m2, m0, m4, 14, m4
+    PRED4x4_LOWPASS m0, m2, m3, m0
+    movq      [r1+r2*2], m0
+    psrldq    m0, 2
+    movq      [r1+r2*1], m0
+    psrldq    m0, 2
+    movq      [r0+r2*2], m0
+    psrldq    m0, 2
+    movq      [r0+r2*1], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_DR
+INIT_XMM ssse3
+PRED4x4_DR
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_DR
+%endif
+
+;------------------------------------------------------------------------------
+; void ff_pred4x4_vertical_right(pixel *src, const pixel *topright, int stride)
+;------------------------------------------------------------------------------
+%macro PRED4x4_VR 0
+cglobal pred4x4_vertical_right_10, 3, 3, 6
+    sub     r0, r2
+    lea     r1, [r0+r2*2]
+    movq    m5, [r0]            ; ........t3t2t1t0
+    movhps  m1, [r0-8]
+    PALIGNR m0, m5, m1, 14, m1  ; ......t3t2t1t0lt
+    pavgw   m5, m0
+    movhps  m1, [r0+r2*1-8]
+    PALIGNR m0, m1, 14, m1      ; ....t3t2t1t0ltl0
+    movhps  m2, [r0+r2*2-8]
+    PALIGNR m1, m0, m2, 14, m2  ; ..t3t2t1t0ltl0l1
+    movhps  m3, [r1+r2*1-8]
+    PALIGNR m2, m1, m3, 14, m3  ; t3t2t1t0ltl0l1l2
+    PRED4x4_LOWPASS m1, m0, m2, m1
+    pslldq  m0, m1, 12
+    psrldq  m1, 4
+    movq    [r0+r2*1], m5
+    movq    [r0+r2*2], m1
+    PALIGNR m5, m0, 14, m2
+    pslldq  m0, 2
+    movq    [r1+r2*1], m5
+    PALIGNR m1, m0, 14, m0
+    movq    [r1+r2*2], m1
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_VR
+INIT_XMM ssse3
+PRED4x4_VR
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_VR
+%endif
+
+;-------------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride)
+;-------------------------------------------------------------------------------
+%macro PRED4x4_HD 0
+cglobal pred4x4_horizontal_down_10, 3, 3
+    sub        r0, r2
+    lea        r1, [r0+r2*2]
+    movq       m0, [r0-8]      ; lt ..
+    movhps     m0, [r0]
+    pslldq     m0, 2           ; t2 t1 t0 lt .. .. .. ..
+    movq       m1, [r1+r2*2-8] ; l3
+    movq       m3, [r1+r2*1-8]
+    punpcklwd  m1, m3          ; l2 l3
+    movq       m2, [r0+r2*2-8] ; l1
+    movq       m3, [r0+r2*1-8]
+    punpcklwd  m2, m3          ; l0 l1
+    punpckhdq  m1, m2          ; l0 l1 l2 l3
+    punpckhqdq m1, m0          ; t2 t1 t0 lt l0 l1 l2 l3
+    psrldq     m0, m1, 4       ; .. .. t2 t1 t0 lt l0 l1
+    psrldq     m3, m1, 2       ; .. t2 t1 t0 lt l0 l1 l2
+    pavgw      m5, m1, m3
+    PRED4x4_LOWPASS m3, m1, m0, m3
+    punpcklwd  m5, m3
+    psrldq     m3, 8
+    PALIGNR    m3, m5, 12, m4
+    movq       [r1+r2*2], m5
+    movhps     [r0+r2*2], m5
+    psrldq     m5, 4
+    movq       [r1+r2*1], m5
+    movq       [r0+r2*1], m3
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_HD
+INIT_XMM ssse3
+PRED4x4_HD
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_HD
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmxext
+cglobal pred4x4_dc_10, 3, 3
+    sub    r0, r2
+    lea    r1, [r0+r2*2]
+    movq   m2, [r0+r2*1-8]
+    paddw  m2, [r0+r2*2-8]
+    paddw  m2, [r1+r2*1-8]
+    paddw  m2, [r1+r2*2-8]
+    psrlq  m2, 48
+    movq   m0, [r0]
+    HADDW  m0, m1
+    paddw  m0, [pw_4]
+    paddw  m0, m2
+    psrlw  m0, 3
+    SPLATW m0, m0, 0
+    movq   [r0+r2*1], m0
+    movq   [r0+r2*2], m0
+    movq   [r1+r2*1], m0
+    movq   [r1+r2*2], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_down_left(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_DL 0
+cglobal pred4x4_down_left_10, 3, 3
+    sub        r0, r2
+    movq       m0, [r0]
+    movhps     m0, [r1]
+    psrldq     m2, m0, 2
+    pslldq     m3, m0, 2
+    pshufhw    m2, m2, 10100100b
+    PRED4x4_LOWPASS m0, m3, m2, m0
+    lea        r1, [r0+r2*2]
+    movhps     [r1+r2*2], m0
+    psrldq     m0, 2
+    movq       [r0+r2*1], m0
+    psrldq     m0, 2
+    movq       [r0+r2*2], m0
+    psrldq     m0, 2
+    movq       [r1+r2*1], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_DL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_DL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_vertical_left(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED4x4_VL 0
+cglobal pred4x4_vertical_left_10, 3, 3
+    sub        r0, r2
+    movu       m1, [r0]
+    movhps     m1, [r1]
+    psrldq     m0, m1, 2
+    psrldq     m2, m1, 4
+    pavgw      m4, m0, m1
+    PRED4x4_LOWPASS m0, m1, m2, m0
+    lea        r1, [r0+r2*2]
+    movq       [r0+r2*1], m4
+    movq       [r0+r2*2], m0
+    psrldq     m4, 2
+    psrldq     m0, 2
+    movq       [r1+r2*1], m4
+    movq       [r1+r2*2], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED4x4_VL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED4x4_VL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
+;-----------------------------------------------------------------------------
+INIT_MMX mmxext
+cglobal pred4x4_horizontal_up_10, 3, 3
+    sub       r0, r2
+    lea       r1, [r0+r2*2]
+    movq      m0, [r0+r2*1-8]
+    punpckhwd m0, [r0+r2*2-8]
+    movq      m1, [r1+r2*1-8]
+    punpckhwd m1, [r1+r2*2-8]
+    punpckhdq m0, m1
+    pshufw    m1, m1, 0xFF
+    movq      [r1+r2*2], m1
+    movd      [r1+r2*1+4], m1
+    pshufw    m2, m0, 11111001b
+    movq      m1, m2
+    pavgw     m2, m0
+
+    pshufw    m5, m0, 11111110b
+    PRED4x4_LOWPASS m1, m0, m5, m1
+    movq      m6, m2
+    punpcklwd m6, m1
+    movq      [r0+r2*1], m6
+    psrlq     m2, 16
+    psrlq     m1, 16
+    punpcklwd m2, m1
+    movq      [r0+r2*2], m2
+    psrlq     m2, 32
+    movd      [r1+r2*1], m2
+    RET
+
+
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_vertical(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_vertical_10, 2, 2
+    sub  r0, r1
+    mova m0, [r0]
+%rep 3
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    lea  r0, [r0+r1*2]
+%endrep
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_horizontal(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_horizontal_10, 2, 3
+    mov         r2d, 4
+.loop:
+    movq         m0, [r0+r1*0-8]
+    movq         m1, [r0+r1*1-8]
+    pshuflw      m0, m0, 0xff
+    pshuflw      m1, m1, 0xff
+    punpcklqdq   m0, m0
+    punpcklqdq   m1, m1
+    mova  [r0+r1*0], m0
+    mova  [r0+r1*1], m1
+    lea          r0, [r0+r1*2]
+    dec          r2d
+    jg .loop
+    REP_RET
+
+;-----------------------------------------------------------------------------
+; void ff_predict_8x8_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MOV8 2-3
+; sort of a hack, but it works
+%if mmsize==8
+    movq    [%1+0], %2
+    movq    [%1+8], %3
+%else
+    movdqa    [%1], %2
+%endif
+%endmacro
+
+%macro PRED8x8_DC 1
+cglobal pred8x8_dc_10, 2, 6
+    sub         r0, r1
+    pxor        m4, m4
+    movq        m0, [r0+0]
+    movq        m1, [r0+8]
+%if mmsize==16
+    punpcklwd   m0, m1
+    movhlps     m1, m0
+    paddw       m0, m1
+%else
+    pshufw      m2, m0, 00001110b
+    pshufw      m3, m1, 00001110b
+    paddw       m0, m2
+    paddw       m1, m3
+    punpcklwd   m0, m1
+%endif
+    %1          m2, m0, 00001110b
+    paddw       m0, m2
+
+    lea         r5, [r1*3]
+    lea         r4, [r0+r1*4]
+    movzx      r2d, word [r0+r1*1-2]
+    movzx      r3d, word [r0+r1*2-2]
+    add        r2d, r3d
+    movzx      r3d, word [r0+r5*1-2]
+    add        r2d, r3d
+    movzx      r3d, word [r4-2]
+    add        r2d, r3d
+    movd        m2, r2d            ; s2
+
+    movzx      r2d, word [r4+r1*1-2]
+    movzx      r3d, word [r4+r1*2-2]
+    add        r2d, r3d
+    movzx      r3d, word [r4+r5*1-2]
+    add        r2d, r3d
+    movzx      r3d, word [r4+r1*4-2]
+    add        r2d, r3d
+    movd        m3, r2d            ; s3
+
+    punpcklwd   m2, m3
+    punpckldq   m0, m2            ; s0, s1, s2, s3
+    %1          m3, m0, 11110110b ; s2, s1, s3, s3
+    %1          m0, m0, 01110100b ; s0, s1, s3, s1
+    paddw       m0, m3
+    psrlw       m0, 2
+    pavgw       m0, m4            ; s0+s2, s1, s3, s1+s3
+%if mmsize==16
+    punpcklwd   m0, m0
+    pshufd      m3, m0, 11111010b
+    punpckldq   m0, m0
+    SWAP         0,1
+%else
+    pshufw      m1, m0, 0x00
+    pshufw      m2, m0, 0x55
+    pshufw      m3, m0, 0xaa
+    pshufw      m4, m0, 0xff
+%endif
+    MOV8   r0+r1*1, m1, m2
+    MOV8   r0+r1*2, m1, m2
+    MOV8   r0+r5*1, m1, m2
+    MOV8   r0+r1*4, m1, m2
+    MOV8   r4+r1*1, m3, m4
+    MOV8   r4+r1*2, m3, m4
+    MOV8   r4+r5*1, m3, m4
+    MOV8   r4+r1*4, m3, m4
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8_DC pshufw
+INIT_XMM sse2
+PRED8x8_DC pshuflw
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_top_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_top_dc_10, 2, 4
+    sub         r0, r1
+    mova        m0, [r0]
+    pshuflw     m1, m0, 0x4e
+    pshufhw     m1, m1, 0x4e
+    paddw       m0, m1
+    pshuflw     m1, m0, 0xb1
+    pshufhw     m1, m1, 0xb1
+    paddw       m0, m1
+    lea         r2, [r1*3]
+    lea         r3, [r0+r1*4]
+    paddw       m0, [pw_2]
+    psrlw       m0, 2
+    mova [r0+r1*1], m0
+    mova [r0+r1*2], m0
+    mova [r0+r2*1], m0
+    mova [r0+r1*4], m0
+    mova [r3+r1*1], m0
+    mova [r3+r1*2], m0
+    mova [r3+r2*1], m0
+    mova [r3+r1*4], m0
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8_plane(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+INIT_XMM sse2
+cglobal pred8x8_plane_10, 2, 7, 7
+    sub       r0, r1
+    lea       r2, [r1*3]
+    lea       r3, [r0+r1*4]
+    mova      m2, [r0]
+    pmaddwd   m2, [pw_m32101234]
+    HADDD     m2, m1
+    movd      m0, [r0-4]
+    psrld     m0, 14
+    psubw     m2, m0               ; H
+    movd      m0, [r3+r1*4-4]
+    movd      m1, [r0+12]
+    paddw     m0, m1
+    psllw     m0, 4                ; 16*(src[7*stride-1] + src[-stride+7])
+    movzx    r4d, word [r3+r1*1-2] ; src[4*stride-1]
+    movzx    r5d, word [r0+r2*1-2] ; src[2*stride-1]
+    sub      r4d, r5d
+    movzx    r6d, word [r3+r1*2-2] ; src[5*stride-1]
+    movzx    r5d, word [r0+r1*2-2] ; src[1*stride-1]
+    sub      r6d, r5d
+    lea      r4d, [r4+r6*2]
+    movzx    r5d, word [r3+r2*1-2] ; src[6*stride-1]
+    movzx    r6d, word [r0+r1*1-2] ; src[0*stride-1]
+    sub      r5d, r6d
+    lea      r5d, [r5*3]
+    add      r4d, r5d
+    movzx    r6d, word [r3+r1*4-2] ; src[7*stride-1]
+    movzx    r5d, word [r0+r1*0-2] ; src[ -stride-1]
+    sub      r6d, r5d
+    lea      r4d, [r4+r6*4]
+    movd      m3, r4d              ; V
+    punpckldq m2, m3
+    pmaddwd   m2, [pd_17]
+    paddd     m2, [pd_16]
+    psrad     m2, 5                ; b, c
+
+    mova      m3, [pw_pixel_max]
+    pxor      m1, m1
+    SPLATW    m0, m0, 1
+    SPLATW    m4, m2, 2
+    SPLATW    m2, m2, 0
+    pmullw    m2, [pw_m32101234]   ; b
+    pmullw    m5, m4, [pw_m3]      ; c
+    paddw     m5, [pw_16]
+    mov      r2d, 8
+    add       r0, r1
+.loop:
+    paddsw    m6, m2, m5
+    paddsw    m6, m0
+    psraw     m6, 5
+    CLIPW     m6, m1, m3
+    mova    [r0], m6
+    paddw     m5, m4
+    add       r0, r1
+    dec r2d
+    jg .loop
+    REP_RET
+
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_128_dc(pixel *src, int has_topleft, int has_topright,
+;                         int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_128_DC 0
+cglobal pred8x8l_128_dc_10, 4, 4
+    mova      m0, [pw_512] ; (1<<(BIT_DEPTH-1))
+    lea       r1, [r3*3]
+    lea       r2, [r0+r3*4]
+    MOV8 r0+r3*0, m0, m0
+    MOV8 r0+r3*1, m0, m0
+    MOV8 r0+r3*2, m0, m0
+    MOV8 r0+r1*1, m0, m0
+    MOV8 r2+r3*0, m0, m0
+    MOV8 r2+r3*1, m0, m0
+    MOV8 r2+r3*2, m0, m0
+    MOV8 r2+r1*1, m0, m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+PRED8x8L_128_DC
+INIT_XMM sse2
+PRED8x8L_128_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_top_dc(pixel *src, int has_topleft, int has_topright,
+;                         int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_TOP_DC 0
+cglobal pred8x8l_top_dc_10, 4, 4, 6
+    sub         r0, r3
+    mova        m0, [r0]
+    shr        r1d, 14
+    shr        r2d, 13
+    neg         r1
+    pslldq      m1, m0, 2
+    psrldq      m2, m0, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    lea         r1, [r3*3]
+    lea         r2, [r0+r3*4]
+    PRED4x4_LOWPASS m0, m2, m1, m0
+    HADDW       m0, m1
+    paddw       m0, [pw_4]
+    psrlw       m0, 3
+    SPLATW      m0, m0, 0
+    mova [r0+r3*1], m0
+    mova [r0+r3*2], m0
+    mova [r0+r1*1], m0
+    mova [r0+r3*4], m0
+    mova [r2+r3*1], m0
+    mova [r2+r3*2], m0
+    mova [r2+r1*1], m0
+    mova [r2+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_TOP_DC
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_TOP_DC
+%endif
+
+;-------------------------------------------------------------------------------
+; void ff_pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride)
+;-------------------------------------------------------------------------------
+;TODO: see if scalar is faster
+%macro PRED8x8L_DC 0
+cglobal pred8x8l_dc_10, 4, 6, 6
+    sub         r0, r3
+    lea         r4, [r0+r3*4]
+    lea         r5, [r3*3]
+    mova        m0, [r0+r3*2-16]
+    punpckhwd   m0, [r0+r3*1-16]
+    mova        m1, [r4+r3*0-16]
+    punpckhwd   m1, [r0+r5*1-16]
+    punpckhdq   m1, m0
+    mova        m2, [r4+r3*2-16]
+    punpckhwd   m2, [r4+r3*1-16]
+    mova        m3, [r4+r3*4-16]
+    punpckhwd   m3, [r4+r5*1-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    mova        m0, [r0]
+    shr        r1d, 14
+    shr        r2d, 13
+    neg         r1
+    pslldq      m1, m0, 2
+    psrldq      m2, m0, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    not         r1
+    and         r1, r3
+    pslldq      m4, m3, 2
+    psrldq      m5, m3, 2
+    pshuflw     m4, m4, 11100101b
+    pinsrw      m5, [r0+r1-2], 7
+    PRED4x4_LOWPASS m3, m4, m5, m3
+    PRED4x4_LOWPASS m0, m2, m1, m0
+    paddw       m0, m3
+    HADDW       m0, m1
+    paddw       m0, [pw_8]
+    psrlw       m0, 4
+    SPLATW      m0, m0
+    mova [r0+r3*1], m0
+    mova [r0+r3*2], m0
+    mova [r0+r5*1], m0
+    mova [r0+r3*4], m0
+    mova [r4+r3*1], m0
+    mova [r4+r3*2], m0
+    mova [r4+r5*1], m0
+    mova [r4+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_DC
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_DC
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical(pixel *src, int has_topleft, int has_topright,
+;                           int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_VERTICAL 0
+cglobal pred8x8l_vertical_10, 4, 4, 6
+    sub         r0, r3
+    mova        m0, [r0]
+    shr        r1d, 14
+    shr        r2d, 13
+    neg         r1
+    pslldq      m1, m0, 2
+    psrldq      m2, m0, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    lea         r1, [r3*3]
+    lea         r2, [r0+r3*4]
+    PRED4x4_LOWPASS m0, m2, m1, m0
+    mova [r0+r3*1], m0
+    mova [r0+r3*2], m0
+    mova [r0+r1*1], m0
+    mova [r0+r3*4], m0
+    mova [r2+r3*1], m0
+    mova [r2+r3*2], m0
+    mova [r2+r1*1], m0
+    mova [r2+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_VERTICAL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_VERTICAL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright,
+;                             int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_HORIZONTAL 0
+cglobal pred8x8l_horizontal_10, 4, 4, 5
+    mova        m0, [r0-16]
+    shr        r1d, 14
+    dec         r1
+    and         r1, r3
+    sub         r1, r3
+    punpckhwd   m0, [r0+r1-16]
+    mova        m1, [r0+r3*2-16]
+    punpckhwd   m1, [r0+r3*1-16]
+    lea         r2, [r0+r3*4]
+    lea         r1, [r3*3]
+    punpckhdq   m1, m0
+    mova        m2, [r2+r3*0-16]
+    punpckhwd   m2, [r0+r1-16]
+    mova        m3, [r2+r3*2-16]
+    punpckhwd   m3, [r2+r3*1-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    PALIGNR     m4, m3, [r2+r1-16], 14, m0
+    pslldq      m0, m4, 2
+    pshuflw     m0, m0, 11100101b
+    PRED4x4_LOWPASS m4, m3, m0, m4
+    punpckhwd   m3, m4, m4
+    punpcklwd   m4, m4
+    pshufd      m0, m3, 0xff
+    pshufd      m1, m3, 0xaa
+    pshufd      m2, m3, 0x55
+    pshufd      m3, m3, 0x00
+    mova [r0+r3*0], m0
+    mova [r0+r3*1], m1
+    mova [r0+r3*2], m2
+    mova [r0+r1*1], m3
+    pshufd      m0, m4, 0xff
+    pshufd      m1, m4, 0xaa
+    pshufd      m2, m4, 0x55
+    pshufd      m3, m4, 0x00
+    mova [r2+r3*0], m0
+    mova [r2+r3*1], m1
+    mova [r2+r3*2], m2
+    mova [r2+r1*1], m3
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_HORIZONTAL
+INIT_XMM ssse3
+PRED8x8L_HORIZONTAL
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_HORIZONTAL
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_left(pixel *src, int has_topleft, int has_topright,
+;                            int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_DOWN_LEFT 0
+cglobal pred8x8l_down_left_10, 4, 4, 7
+    sub         r0, r3
+    mova        m3, [r0]
+    shr        r1d, 14
+    neg         r1
+    shr        r2d, 13
+    pslldq      m1, m3, 2
+    psrldq      m2, m3, 2
+    pinsrw      m1, [r0+r1], 0
+    pinsrw      m2, [r0+r2+14], 7
+    PRED4x4_LOWPASS m6, m2, m1, m3
+    jz .fix_tr ; flags from shr r2d
+    mova        m1, [r0+16]
+    psrldq      m5, m1, 2
+    PALIGNR     m2, m1, m3, 14, m3
+    pshufhw     m5, m5, 10100100b
+    PRED4x4_LOWPASS m1, m2, m5, m1
+.do_topright:
+    lea         r1, [r3*3]
+    psrldq      m5, m1, 14
+    lea         r2, [r0+r3*4]
+    PALIGNR     m2, m1, m6,  2, m0
+    PALIGNR     m3, m1, m6, 14, m0
+    PALIGNR     m5, m1,  2, m0
+    pslldq      m4, m6, 2
+    PRED4x4_LOWPASS m6, m4, m2, m6
+    PRED4x4_LOWPASS m1, m3, m5, m1
+    mova [r2+r3*4], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r2+r1*1], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r2+r3*2], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r2+r3*1], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*4], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r1*1], m1
+    PALIGNR     m1, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*2], m1
+    PALIGNR     m1, m6, 14, m6
+    mova [r0+r3*1], m1
+    RET
+.fix_tr:
+    punpckhwd   m3, m3
+    pshufd      m1, m3, 0xFF
+    jmp .do_topright
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_DOWN_LEFT
+INIT_XMM ssse3
+PRED8x8L_DOWN_LEFT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_DOWN_LEFT
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_down_right(pixel *src, int has_topleft, int has_topright,
+;                             int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_DOWN_RIGHT 0
+; standard forbids this when has_topleft is false
+; no need to check
+cglobal pred8x8l_down_right_10, 4, 5, 8
+    sub         r0, r3
+    lea         r4, [r0+r3*4]
+    lea         r1, [r3*3]
+    mova        m0, [r0+r3*1-16]
+    punpckhwd   m0, [r0+r3*0-16]
+    mova        m1, [r0+r1*1-16]
+    punpckhwd   m1, [r0+r3*2-16]
+    punpckhdq   m1, m0
+    mova        m2, [r4+r3*1-16]
+    punpckhwd   m2, [r4+r3*0-16]
+    mova        m3, [r4+r1*1-16]
+    punpckhwd   m3, [r4+r3*2-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    mova        m0, [r4+r3*4-16]
+    mova        m1, [r0]
+    PALIGNR     m4, m3, m0, 14, m0
+    PALIGNR     m1, m3,  2, m2
+    pslldq      m0, m4, 2
+    pshuflw     m0, m0, 11100101b
+    PRED4x4_LOWPASS m6, m1, m4, m3
+    PRED4x4_LOWPASS m4, m3, m0, m4
+    mova        m3, [r0]
+    shr        r2d, 13
+    pslldq      m1, m3, 2
+    psrldq      m2, m3, 2
+    pinsrw      m1, [r0-2], 0
+    pinsrw      m2, [r0+r2+14], 7
+    PRED4x4_LOWPASS m3, m2, m1, m3
+    PALIGNR     m2, m3, m6,  2, m0
+    PALIGNR     m5, m3, m6, 14, m0
+    psrldq      m7, m3, 2
+    PRED4x4_LOWPASS m6, m4, m2, m6
+    PRED4x4_LOWPASS m3, m5, m7, m3
+    mova [r4+r3*4], m6
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*1], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*2], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r1*1], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r0+r3*4], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r4+r3*1], m3
+    PALIGNR     m3, m6, 14, m2
+    pslldq      m6, 2
+    mova [r4+r3*2], m3
+    PALIGNR     m3, m6, 14, m6
+    mova [r4+r1*1], m3
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_DOWN_RIGHT
+INIT_XMM ssse3
+PRED8x8L_DOWN_RIGHT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_DOWN_RIGHT
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_vertical_right(pixel *src, int has_topleft,
+;                                 int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_VERTICAL_RIGHT 0
+; likewise with 8x8l_down_right
+cglobal pred8x8l_vertical_right_10, 4, 5, 7
+    sub         r0, r3
+    lea         r4, [r0+r3*4]
+    lea         r1, [r3*3]
+    mova        m0, [r0+r3*1-16]
+    punpckhwd   m0, [r0+r3*0-16]
+    mova        m1, [r0+r1*1-16]
+    punpckhwd   m1, [r0+r3*2-16]
+    punpckhdq   m1, m0
+    mova        m2, [r4+r3*1-16]
+    punpckhwd   m2, [r4+r3*0-16]
+    mova        m3, [r4+r1*1-16]
+    punpckhwd   m3, [r4+r3*2-16]
+    punpckhdq   m3, m2
+    punpckhqdq  m3, m1
+    mova        m0, [r4+r3*4-16]
+    mova        m1, [r0]
+    PALIGNR     m4, m3, m0, 14, m0
+    PALIGNR     m1, m3,  2, m2
+    PRED4x4_LOWPASS m3, m1, m4, m3
+    mova        m2, [r0]
+    shr        r2d, 13
+    pslldq      m1, m2, 2
+    psrldq      m5, m2, 2
+    pinsrw      m1, [r0-2], 0
+    pinsrw      m5, [r0+r2+14], 7
+    PRED4x4_LOWPASS m2, m5, m1, m2
+    PALIGNR     m6, m2, m3, 12, m1
+    PALIGNR     m5, m2, m3, 14, m0
+    PRED4x4_LOWPASS m0, m6, m2, m5
+    pavgw       m2, m5
+    mova [r0+r3*2], m0
+    mova [r0+r3*1], m2
+    pslldq      m6, m3, 4
+    pslldq      m1, m3, 2
+    PRED4x4_LOWPASS m1, m3, m6, m1
+    PALIGNR     m2, m1, 14, m4
+    mova [r0+r1*1], m2
+    pslldq      m1, 2
+    PALIGNR     m0, m1, 14, m3
+    mova [r0+r3*4], m0
+    pslldq      m1, 2
+    PALIGNR     m2, m1, 14, m4
+    mova [r4+r3*1], m2
+    pslldq      m1, 2
+    PALIGNR     m0, m1, 14, m3
+    mova [r4+r3*2], m0
+    pslldq      m1, 2
+    PALIGNR     m2, m1, 14, m4
+    mova [r4+r1*1], m2
+    pslldq      m1, 2
+    PALIGNR     m0, m1, 14, m1
+    mova [r4+r3*4], m0
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_VERTICAL_RIGHT
+INIT_XMM ssse3
+PRED8x8L_VERTICAL_RIGHT
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_VERTICAL_RIGHT
+%endif
+
+;-----------------------------------------------------------------------------
+; void ff_pred8x8l_horizontal_up(pixel *src, int has_topleft,
+;                                int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED8x8L_HORIZONTAL_UP 0
+cglobal pred8x8l_horizontal_up_10, 4, 4, 6
+    mova        m0, [r0+r3*0-16]
+    punpckhwd   m0, [r0+r3*1-16]
+    shr        r1d, 14
+    dec         r1
+    and         r1, r3
+    sub         r1, r3
+    mova        m4, [r0+r1*1-16]
+    lea         r1, [r3*3]
+    lea         r2, [r0+r3*4]
+    mova        m1, [r0+r3*2-16]
+    punpckhwd   m1, [r0+r1*1-16]
+    punpckhdq   m0, m1
+    mova        m2, [r2+r3*0-16]
+    punpckhwd   m2, [r2+r3*1-16]
+    mova        m3, [r2+r3*2-16]
+    punpckhwd   m3, [r2+r1*1-16]
+    punpckhdq   m2, m3
+    punpckhqdq  m0, m2
+    PALIGNR     m1, m0, m4, 14, m4
+    psrldq      m2, m0, 2
+    pshufhw     m2, m2, 10100100b
+    PRED4x4_LOWPASS m0, m1, m2, m0
+    psrldq      m1, m0, 2
+    psrldq      m2, m0, 4
+    pshufhw     m1, m1, 10100100b
+    pshufhw     m2, m2, 01010100b
+    pavgw       m4, m0, m1
+    PRED4x4_LOWPASS m1, m2, m0, m1
+    punpckhwd   m5, m4, m1
+    punpcklwd   m4, m1
+    mova [r2+r3*0], m5
+    mova [r0+r3*0], m4
+    pshufd      m0, m5, 11111001b
+    pshufd      m1, m5, 11111110b
+    pshufd      m2, m5, 11111111b
+    mova [r2+r3*1], m0
+    mova [r2+r3*2], m1
+    mova [r2+r1*1], m2
+    PALIGNR     m2, m5, m4, 4, m0
+    PALIGNR     m3, m5, m4, 8, m1
+    PALIGNR     m5, m5, m4, 12, m4
+    mova [r0+r3*1], m2
+    mova [r0+r3*2], m3
+    mova [r0+r1*1], m5
+    RET
+%endmacro
+
+INIT_XMM sse2
+PRED8x8L_HORIZONTAL_UP
+INIT_XMM ssse3
+PRED8x8L_HORIZONTAL_UP
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PRED8x8L_HORIZONTAL_UP
+%endif
+
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_vertical(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro MOV16 3-5
+    mova [%1+     0], %2
+    mova [%1+mmsize], %3
+%if mmsize==8
+    mova [%1+    16], %4
+    mova [%1+    24], %5
+%endif
+%endmacro
+
+%macro PRED16x16_VERTICAL 0
+cglobal pred16x16_vertical_10, 2, 3
+    sub   r0, r1
+    mov  r2d, 8
+    mova  m0, [r0+ 0]
+    mova  m1, [r0+mmsize]
+%if mmsize==8
+    mova  m2, [r0+16]
+    mova  m3, [r0+24]
+%endif
+.loop:
+    MOV16 r0+r1*1, m0, m1, m2, m3
+    MOV16 r0+r1*2, m0, m1, m2, m3
+    lea   r0, [r0+r1*2]
+    dec   r2d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_VERTICAL
+INIT_XMM sse2
+PRED16x16_VERTICAL
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_horizontal(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED16x16_HORIZONTAL 0
+cglobal pred16x16_horizontal_10, 2, 3
+    mov   r2d, 8
+.vloop:
+    movd   m0, [r0+r1*0-4]
+    movd   m1, [r0+r1*1-4]
+    SPLATW m0, m0, 1
+    SPLATW m1, m1, 1
+    MOV16  r0+r1*0, m0, m0, m0, m0
+    MOV16  r0+r1*1, m1, m1, m1, m1
+    lea    r0, [r0+r1*2]
+    dec    r2d
+    jg .vloop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_HORIZONTAL
+INIT_XMM sse2
+PRED16x16_HORIZONTAL
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED16x16_DC 0
+cglobal pred16x16_dc_10, 2, 6
+    mov        r5, r0
+    sub        r0, r1
+    mova       m0, [r0+0]
+    paddw      m0, [r0+mmsize]
+%if mmsize==8
+    paddw      m0, [r0+16]
+    paddw      m0, [r0+24]
+%endif
+    HADDW      m0, m2
+
+    lea        r0, [r0+r1-2]
+    movzx     r3d, word [r0]
+    movzx     r4d, word [r0+r1]
+%rep 7
+    lea        r0, [r0+r1*2]
+    movzx     r2d, word [r0]
+    add       r3d, r2d
+    movzx     r2d, word [r0+r1]
+    add       r4d, r2d
+%endrep
+    lea       r3d, [r3+r4+16]
+
+    movd       m1, r3d
+    paddw      m0, m1
+    psrlw      m0, 5
+    SPLATW     m0, m0
+    mov       r3d, 8
+.loop:
+    MOV16 r5+r1*0, m0, m0, m0, m0
+    MOV16 r5+r1*1, m0, m0, m0, m0
+    lea        r5, [r5+r1*2]
+    dec       r3d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_DC
+INIT_XMM sse2
+PRED16x16_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_top_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED16x16_TOP_DC 0
+cglobal pred16x16_top_dc_10, 2, 3
+    sub        r0, r1
+    mova       m0, [r0+0]
+    paddw      m0, [r0+mmsize]
+%if mmsize==8
+    paddw      m0, [r0+16]
+    paddw      m0, [r0+24]
+%endif
+    HADDW      m0, m2
+
+    SPLATW     m0, m0
+    paddw      m0, [pw_8]
+    psrlw      m0, 4
+    mov       r2d, 8
+.loop:
+    MOV16 r0+r1*1, m0, m0, m0, m0
+    MOV16 r0+r1*2, m0, m0, m0, m0
+    lea        r0, [r0+r1*2]
+    dec       r2d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_TOP_DC
+INIT_XMM sse2
+PRED16x16_TOP_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_left_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED16x16_LEFT_DC 0
+cglobal pred16x16_left_dc_10, 2, 6
+    mov        r5, r0
+
+    sub        r0, 2
+    movzx     r3d, word [r0]
+    movzx     r4d, word [r0+r1]
+%rep 7
+    lea        r0, [r0+r1*2]
+    movzx     r2d, word [r0]
+    add       r3d, r2d
+    movzx     r2d, word [r0+r1]
+    add       r4d, r2d
+%endrep
+    lea       r3d, [r3+r4+8]
+    shr       r3d, 4
+
+    movd       m0, r3d
+    SPLATW     m0, m0
+    mov       r3d, 8
+.loop:
+    MOV16 r5+r1*0, m0, m0, m0, m0
+    MOV16 r5+r1*1, m0, m0, m0, m0
+    lea        r5, [r5+r1*2]
+    dec       r3d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_LEFT_DC
+INIT_XMM sse2
+PRED16x16_LEFT_DC
+
+;-----------------------------------------------------------------------------
+; void ff_pred16x16_128_dc(pixel *src, int stride)
+;-----------------------------------------------------------------------------
+%macro PRED16x16_128_DC 0
+cglobal pred16x16_128_dc_10, 2,3
+    mova       m0, [pw_512]
+    mov       r2d, 8
+.loop:
+    MOV16 r0+r1*0, m0, m0, m0, m0
+    MOV16 r0+r1*1, m0, m0, m0, m0
+    lea        r0, [r0+r1*2]
+    dec       r2d
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PRED16x16_128_DC
+INIT_XMM sse2
+PRED16x16_128_DC
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred_init.c b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
new file mode 100644
index 000000000..528b92e49
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/h264pred.h"
+
+#define PRED4x4(TYPE, DEPTH, OPT) \
+void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                    const uint8_t *topright, \
+                                                    ptrdiff_t stride);
+
+PRED4x4(dc, 10, mmxext)
+PRED4x4(down_left, 10, sse2)
+PRED4x4(down_left, 10, avx)
+PRED4x4(down_right, 10, sse2)
+PRED4x4(down_right, 10, ssse3)
+PRED4x4(down_right, 10, avx)
+PRED4x4(vertical_left, 10, sse2)
+PRED4x4(vertical_left, 10, avx)
+PRED4x4(vertical_right, 10, sse2)
+PRED4x4(vertical_right, 10, ssse3)
+PRED4x4(vertical_right, 10, avx)
+PRED4x4(horizontal_up, 10, mmxext)
+PRED4x4(horizontal_down, 10, sse2)
+PRED4x4(horizontal_down, 10, ssse3)
+PRED4x4(horizontal_down, 10, avx)
+
+#define PRED8x8(TYPE, DEPTH, OPT) \
+void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                    ptrdiff_t stride);
+
+PRED8x8(dc, 10, mmxext)
+PRED8x8(dc, 10, sse2)
+PRED8x8(top_dc, 10, sse2)
+PRED8x8(plane, 10, sse2)
+PRED8x8(vertical, 10, sse2)
+PRED8x8(horizontal, 10, sse2)
+
+#define PRED8x8L(TYPE, DEPTH, OPT)\
+void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                     int has_topleft, \
+                                                     int has_topright, \
+                                                     ptrdiff_t stride);
+
+PRED8x8L(dc, 10, sse2)
+PRED8x8L(dc, 10, avx)
+PRED8x8L(128_dc, 10, mmxext)
+PRED8x8L(128_dc, 10, sse2)
+PRED8x8L(top_dc, 10, sse2)
+PRED8x8L(top_dc, 10, avx)
+PRED8x8L(vertical, 10, sse2)
+PRED8x8L(vertical, 10, avx)
+PRED8x8L(horizontal, 10, sse2)
+PRED8x8L(horizontal, 10, ssse3)
+PRED8x8L(horizontal, 10, avx)
+PRED8x8L(down_left, 10, sse2)
+PRED8x8L(down_left, 10, ssse3)
+PRED8x8L(down_left, 10, avx)
+PRED8x8L(down_right, 10, sse2)
+PRED8x8L(down_right, 10, ssse3)
+PRED8x8L(down_right, 10, avx)
+PRED8x8L(vertical_right, 10, sse2)
+PRED8x8L(vertical_right, 10, ssse3)
+PRED8x8L(vertical_right, 10, avx)
+PRED8x8L(horizontal_up, 10, sse2)
+PRED8x8L(horizontal_up, 10, ssse3)
+PRED8x8L(horizontal_up, 10, avx)
+
+#define PRED16x16(TYPE, DEPTH, OPT)\
+void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
+                                                      ptrdiff_t stride);
+
+PRED16x16(dc, 10, mmxext)
+PRED16x16(dc, 10, sse2)
+PRED16x16(top_dc, 10, mmxext)
+PRED16x16(top_dc, 10, sse2)
+PRED16x16(128_dc, 10, mmxext)
+PRED16x16(128_dc, 10, sse2)
+PRED16x16(left_dc, 10, mmxext)
+PRED16x16(left_dc, 10, sse2)
+PRED16x16(vertical, 10, mmxext)
+PRED16x16(vertical, 10, sse2)
+PRED16x16(horizontal, 10, mmxext)
+PRED16x16(horizontal, 10, sse2)
+
+/* 8-bit versions */
+PRED16x16(vertical, 8, mmx)
+PRED16x16(vertical, 8, sse)
+PRED16x16(horizontal, 8, mmx)
+PRED16x16(horizontal, 8, mmxext)
+PRED16x16(horizontal, 8, ssse3)
+PRED16x16(dc, 8, mmxext)
+PRED16x16(dc, 8, sse2)
+PRED16x16(dc, 8, ssse3)
+PRED16x16(plane_h264, 8, mmx)
+PRED16x16(plane_h264, 8, mmxext)
+PRED16x16(plane_h264, 8, sse2)
+PRED16x16(plane_h264, 8, ssse3)
+PRED16x16(plane_rv40, 8, mmx)
+PRED16x16(plane_rv40, 8, mmxext)
+PRED16x16(plane_rv40, 8, sse2)
+PRED16x16(plane_rv40, 8, ssse3)
+PRED16x16(plane_svq3, 8, mmx)
+PRED16x16(plane_svq3, 8, mmxext)
+PRED16x16(plane_svq3, 8, sse2)
+PRED16x16(plane_svq3, 8, ssse3)
+PRED16x16(tm_vp8, 8, mmx)
+PRED16x16(tm_vp8, 8, mmxext)
+PRED16x16(tm_vp8, 8, sse2)
+
+PRED8x8(top_dc, 8, mmxext)
+PRED8x8(dc_rv40, 8, mmxext)
+PRED8x8(dc, 8, mmxext)
+PRED8x8(vertical, 8, mmx)
+PRED8x8(horizontal, 8, mmx)
+PRED8x8(horizontal, 8, mmxext)
+PRED8x8(horizontal, 8, ssse3)
+PRED8x8(plane, 8, mmx)
+PRED8x8(plane, 8, mmxext)
+PRED8x8(plane, 8, sse2)
+PRED8x8(plane, 8, ssse3)
+PRED8x8(tm_vp8, 8, mmx)
+PRED8x8(tm_vp8, 8, mmxext)
+PRED8x8(tm_vp8, 8, sse2)
+PRED8x8(tm_vp8, 8, ssse3)
+
+PRED8x8L(top_dc, 8, mmxext)
+PRED8x8L(top_dc, 8, ssse3)
+PRED8x8L(dc, 8, mmxext)
+PRED8x8L(dc, 8, ssse3)
+PRED8x8L(horizontal, 8, mmxext)
+PRED8x8L(horizontal, 8, ssse3)
+PRED8x8L(vertical, 8, mmxext)
+PRED8x8L(vertical, 8, ssse3)
+PRED8x8L(down_left, 8, mmxext)
+PRED8x8L(down_left, 8, sse2)
+PRED8x8L(down_left, 8, ssse3)
+PRED8x8L(down_right, 8, mmxext)
+PRED8x8L(down_right, 8, sse2)
+PRED8x8L(down_right, 8, ssse3)
+PRED8x8L(vertical_right, 8, mmxext)
+PRED8x8L(vertical_right, 8, sse2)
+PRED8x8L(vertical_right, 8, ssse3)
+PRED8x8L(vertical_left, 8, sse2)
+PRED8x8L(vertical_left, 8, ssse3)
+PRED8x8L(horizontal_up, 8, mmxext)
+PRED8x8L(horizontal_up, 8, ssse3)
+PRED8x8L(horizontal_down, 8, mmxext)
+PRED8x8L(horizontal_down, 8, sse2)
+PRED8x8L(horizontal_down, 8, ssse3)
+
+PRED4x4(dc, 8, mmxext)
+PRED4x4(down_left, 8, mmxext)
+PRED4x4(down_right, 8, mmxext)
+PRED4x4(vertical_left, 8, mmxext)
+PRED4x4(vertical_right, 8, mmxext)
+PRED4x4(horizontal_up, 8, mmxext)
+PRED4x4(horizontal_down, 8, mmxext)
+PRED4x4(tm_vp8, 8, mmx)
+PRED4x4(tm_vp8, 8, mmxext)
+PRED4x4(tm_vp8, 8, ssse3)
+PRED4x4(vertical_vp8, 8, mmxext)
+
+av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
+                                   const int bit_depth,
+                                   const int chroma_format_idc)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (bit_depth == 8) {
+        if (EXTERNAL_MMX(cpu_flags)) {
+            h->pred16x16[VERT_PRED8x8         ] = ff_pred16x16_vertical_8_mmx;
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_mmx;
+            if (chroma_format_idc <= 1) {
+                h->pred8x8  [VERT_PRED8x8     ] = ff_pred8x8_vertical_8_mmx;
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_mmx;
+            }
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_mmx;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_mmx;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_mmx;
+            } else {
+                if (chroma_format_idc <= 1)
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmx;
+                if (codec_id == AV_CODEC_ID_SVQ3) {
+                    if (cpu_flags & AV_CPU_FLAG_CMOV)
+                        h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_mmx;
+                } else if (codec_id == AV_CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_mmx;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_mmx;
+                }
+            }
+        }
+
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            h->pred16x16[HOR_PRED8x8            ] = ff_pred16x16_horizontal_8_mmxext;
+            h->pred16x16[DC_PRED8x8             ] = ff_pred16x16_dc_8_mmxext;
+            if (chroma_format_idc <= 1)
+                h->pred8x8[HOR_PRED8x8          ] = ff_pred8x8_horizontal_8_mmxext;
+            h->pred8x8l [TOP_DC_PRED            ] = ff_pred8x8l_top_dc_8_mmxext;
+            h->pred8x8l [DC_PRED                ] = ff_pred8x8l_dc_8_mmxext;
+            h->pred8x8l [HOR_PRED               ] = ff_pred8x8l_horizontal_8_mmxext;
+            h->pred8x8l [VERT_PRED              ] = ff_pred8x8l_vertical_8_mmxext;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED   ] = ff_pred8x8l_down_right_8_mmxext;
+            h->pred8x8l [VERT_RIGHT_PRED        ] = ff_pred8x8l_vertical_right_8_mmxext;
+            h->pred8x8l [HOR_UP_PRED            ] = ff_pred8x8l_horizontal_up_8_mmxext;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED    ] = ff_pred8x8l_down_left_8_mmxext;
+            h->pred8x8l [HOR_DOWN_PRED          ] = ff_pred8x8l_horizontal_down_8_mmxext;
+            h->pred4x4  [DIAG_DOWN_RIGHT_PRED   ] = ff_pred4x4_down_right_8_mmxext;
+            h->pred4x4  [VERT_RIGHT_PRED        ] = ff_pred4x4_vertical_right_8_mmxext;
+            h->pred4x4  [HOR_DOWN_PRED          ] = ff_pred4x4_horizontal_down_8_mmxext;
+            h->pred4x4  [DC_PRED                ] = ff_pred4x4_dc_8_mmxext;
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8 ||
+                codec_id == AV_CODEC_ID_H264) {
+                h->pred4x4  [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_8_mmxext;
+            }
+            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+                h->pred4x4  [VERT_LEFT_PRED     ] = ff_pred4x4_vertical_left_8_mmxext;
+            }
+            if (codec_id != AV_CODEC_ID_RV40) {
+                h->pred4x4  [HOR_UP_PRED        ] = ff_pred4x4_horizontal_up_8_mmxext;
+            }
+            if (codec_id == AV_CODEC_ID_SVQ3 || codec_id == AV_CODEC_ID_H264) {
+                if (chroma_format_idc <= 1) {
+                    h->pred8x8[TOP_DC_PRED8x8   ] = ff_pred8x8_top_dc_8_mmxext;
+                    h->pred8x8[DC_PRED8x8       ] = ff_pred8x8_dc_8_mmxext;
+                }
+            }
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8      ] = ff_pred16x16_tm_vp8_8_mmxext;
+                h->pred8x8  [DC_PRED8x8         ] = ff_pred8x8_dc_rv40_8_mmxext;
+                h->pred8x8  [PLANE_PRED8x8      ] = ff_pred8x8_tm_vp8_8_mmxext;
+                h->pred4x4  [TM_VP8_PRED        ] = ff_pred4x4_tm_vp8_8_mmxext;
+                h->pred4x4  [VERT_PRED          ] = ff_pred4x4_vertical_vp8_8_mmxext;
+            } else {
+                if (chroma_format_idc <= 1)
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_mmxext;
+                if (codec_id == AV_CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_svq3_8_mmxext;
+                } else if (codec_id == AV_CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_rv40_8_mmxext;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_h264_8_mmxext;
+                }
+            }
+        }
+
+        if (EXTERNAL_SSE(cpu_flags)) {
+            h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_8_sse;
+        }
+
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_sse2;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_sse2;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_sse2;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_sse2;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_sse2;
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred16x16[PLANE_PRED8x8    ] = ff_pred16x16_tm_vp8_8_sse2;
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_sse2;
+            } else {
+                if (chroma_format_idc <= 1)
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_sse2;
+                if (codec_id == AV_CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_sse2;
+                } else if (codec_id == AV_CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_sse2;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_sse2;
+                }
+            }
+        }
+
+        if (EXTERNAL_SSSE3(cpu_flags)) {
+            h->pred16x16[HOR_PRED8x8          ] = ff_pred16x16_horizontal_8_ssse3;
+            h->pred16x16[DC_PRED8x8           ] = ff_pred16x16_dc_8_ssse3;
+            if (chroma_format_idc <= 1)
+                h->pred8x8  [HOR_PRED8x8      ] = ff_pred8x8_horizontal_8_ssse3;
+            h->pred8x8l [TOP_DC_PRED          ] = ff_pred8x8l_top_dc_8_ssse3;
+            h->pred8x8l [DC_PRED              ] = ff_pred8x8l_dc_8_ssse3;
+            h->pred8x8l [HOR_PRED             ] = ff_pred8x8l_horizontal_8_ssse3;
+            h->pred8x8l [VERT_PRED            ] = ff_pred8x8l_vertical_8_ssse3;
+            h->pred8x8l [DIAG_DOWN_LEFT_PRED  ] = ff_pred8x8l_down_left_8_ssse3;
+            h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_ssse3;
+            h->pred8x8l [VERT_RIGHT_PRED      ] = ff_pred8x8l_vertical_right_8_ssse3;
+            h->pred8x8l [VERT_LEFT_PRED       ] = ff_pred8x8l_vertical_left_8_ssse3;
+            h->pred8x8l [HOR_UP_PRED          ] = ff_pred8x8l_horizontal_up_8_ssse3;
+            h->pred8x8l [HOR_DOWN_PRED        ] = ff_pred8x8l_horizontal_down_8_ssse3;
+            if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
+                h->pred8x8  [PLANE_PRED8x8    ] = ff_pred8x8_tm_vp8_8_ssse3;
+                h->pred4x4  [TM_VP8_PRED      ] = ff_pred4x4_tm_vp8_8_ssse3;
+            } else {
+                if (chroma_format_idc <= 1)
+                    h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_plane_8_ssse3;
+                if (codec_id == AV_CODEC_ID_SVQ3) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_8_ssse3;
+                } else if (codec_id == AV_CODEC_ID_RV40) {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_8_ssse3;
+                } else {
+                    h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_8_ssse3;
+                }
+            }
+        }
+    } else if (bit_depth == 10) {
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            h->pred4x4[DC_PRED             ] = ff_pred4x4_dc_10_mmxext;
+            h->pred4x4[HOR_UP_PRED         ] = ff_pred4x4_horizontal_up_10_mmxext;
+
+            if (chroma_format_idc <= 1)
+                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_mmxext;
+
+            h->pred8x8l[DC_128_PRED        ] = ff_pred8x8l_128_dc_10_mmxext;
+
+            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_mmxext;
+            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_mmxext;
+            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_mmxext;
+            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_mmxext;
+            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_mmxext;
+            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_mmxext;
+        }
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2;
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2;
+            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_sse2;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_sse2;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_sse2;
+
+            if (chroma_format_idc <= 1) {
+                h->pred8x8[DC_PRED8x8      ] = ff_pred8x8_dc_10_sse2;
+                h->pred8x8[TOP_DC_PRED8x8  ] = ff_pred8x8_top_dc_10_sse2;
+                h->pred8x8[PLANE_PRED8x8   ] = ff_pred8x8_plane_10_sse2;
+                h->pred8x8[VERT_PRED8x8    ] = ff_pred8x8_vertical_10_sse2;
+                h->pred8x8[HOR_PRED8x8     ] = ff_pred8x8_horizontal_10_sse2;
+            }
+
+            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_sse2;
+            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_sse2;
+            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_sse2;
+            h->pred8x8l[DC_128_PRED         ] = ff_pred8x8l_128_dc_10_sse2;
+            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_sse2;
+            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_sse2;
+            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_sse2;
+            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_sse2;
+            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_sse2;
+
+            h->pred16x16[DC_PRED8x8        ] = ff_pred16x16_dc_10_sse2;
+            h->pred16x16[TOP_DC_PRED8x8    ] = ff_pred16x16_top_dc_10_sse2;
+            h->pred16x16[DC_128_PRED8x8    ] = ff_pred16x16_128_dc_10_sse2;
+            h->pred16x16[LEFT_DC_PRED8x8   ] = ff_pred16x16_left_dc_10_sse2;
+            h->pred16x16[VERT_PRED8x8      ] = ff_pred16x16_vertical_10_sse2;
+            h->pred16x16[HOR_PRED8x8       ] = ff_pred16x16_horizontal_10_sse2;
+        }
+        if (EXTERNAL_SSSE3(cpu_flags)) {
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_ssse3;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_ssse3;
+
+            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_ssse3;
+            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_ssse3;
+            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_ssse3;
+            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_ssse3;
+            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_ssse3;
+        }
+        if (EXTERNAL_AVX(cpu_flags)) {
+            h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx;
+            h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx;
+            h->pred4x4[VERT_LEFT_PRED      ] = ff_pred4x4_vertical_left_10_avx;
+            h->pred4x4[VERT_RIGHT_PRED     ] = ff_pred4x4_vertical_right_10_avx;
+            h->pred4x4[HOR_DOWN_PRED       ] = ff_pred4x4_horizontal_down_10_avx;
+
+            h->pred8x8l[VERT_PRED           ] = ff_pred8x8l_vertical_10_avx;
+            h->pred8x8l[HOR_PRED            ] = ff_pred8x8l_horizontal_10_avx;
+            h->pred8x8l[DC_PRED             ] = ff_pred8x8l_dc_10_avx;
+            h->pred8x8l[TOP_DC_PRED         ] = ff_pred8x8l_top_dc_10_avx;
+            h->pred8x8l[DIAG_DOWN_RIGHT_PRED] = ff_pred8x8l_down_right_10_avx;
+            h->pred8x8l[DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_10_avx;
+            h->pred8x8l[VERT_RIGHT_PRED     ] = ff_pred8x8l_vertical_right_10_avx;
+            h->pred8x8l[HOR_UP_PRED         ] = ff_pred8x8l_horizontal_up_10_avx;
+        }
+    }
+}
diff --git a/media/ffvpx/libavcodec/x86/mathops.h b/media/ffvpx/libavcodec/x86/mathops.h
new file mode 100644
index 000000000..6298f5ed1
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/mathops.h
@@ -0,0 +1,133 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_MATHOPS_H
+#define AVCODEC_X86_MATHOPS_H
+
+#include "config.h"
+
+#include "libavutil/common.h"
+#include "libavutil/x86/asm.h"
+
+#if HAVE_INLINE_ASM
+
+#if ARCH_X86_32
+
+#define MULL MULL
+static av_always_inline av_const int MULL(int a, int b, unsigned shift)
+{
+    int rt, dummy;
+    __asm__ (
+        "imull %3               \n\t"
+        "shrdl %4, %%edx, %%eax \n\t"
+        :"=a"(rt), "=d"(dummy)
+        :"a"(a), "rm"(b), "ci"((uint8_t)shift)
+    );
+    return rt;
+}
+
+#define MULH MULH
+static av_always_inline av_const int MULH(int a, int b)
+{
+    int rt, dummy;
+    __asm__ (
+        "imull %3"
+        :"=d"(rt), "=a"(dummy)
+        :"a"(a), "rm"(b)
+    );
+    return rt;
+}
+
+#define MUL64 MUL64
+static av_always_inline av_const int64_t MUL64(int a, int b)
+{
+    int64_t rt;
+    __asm__ (
+        "imull %2"
+        :"=A"(rt)
+        :"a"(a), "rm"(b)
+    );
+    return rt;
+}
+
+#endif /* ARCH_X86_32 */
+
+#if HAVE_I686
+/* median of 3 */
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+    int i=b;
+    __asm__ (
+        "cmp    %2, %1 \n\t"
+        "cmovg  %1, %0 \n\t"
+        "cmovg  %2, %1 \n\t"
+        "cmp    %3, %1 \n\t"
+        "cmovl  %3, %1 \n\t"
+        "cmp    %1, %0 \n\t"
+        "cmovg  %1, %0 \n\t"
+        :"+&r"(i), "+&r"(a)
+        :"r"(b), "r"(c)
+    );
+    return i;
+}
+
+#if HAVE_6REGS
+#define COPY3_IF_LT(x, y, a, b, c, d)\
+__asm__ volatile(\
+    "cmpl  %0, %3       \n\t"\
+    "cmovl %3, %0       \n\t"\
+    "cmovl %4, %1       \n\t"\
+    "cmovl %5, %2       \n\t"\
+    : "+&r" (x), "+&r" (a), "+r" (c)\
+    : "r" (y), "r" (b), "r" (d)\
+);
+#endif /* HAVE_6REGS */
+
+#endif /* HAVE_I686 */
+
+#define MASK_ABS(mask, level)                   \
+    __asm__ ("cdq                    \n\t"      \
+             "xorl %1, %0            \n\t"      \
+             "subl %1, %0            \n\t"      \
+             : "+a"(level), "=&d"(mask))
+
+// avoid +32 for shift optimization (gcc should do that ...)
+#define NEG_SSR32 NEG_SSR32
+static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
+    __asm__ ("sarl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+
+#define NEG_USR32 NEG_USR32
+static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
+    __asm__ ("shrl %1, %0\n\t"
+         : "+r" (a)
+         : "ic" ((uint8_t)(-s))
+    );
+    return a;
+}
+
+#endif /* HAVE_INLINE_ASM */
+#endif /* AVCODEC_X86_MATHOPS_H */
diff --git a/media/ffvpx/libavcodec/x86/moz.build b/media/ffvpx/libavcodec/x86/moz.build
new file mode 100644
index 000000000..3f0740df1
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/moz.build
@@ -0,0 +1,35 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+SOURCES += [
+    'constants.c',
+    'flacdsp.asm',
+    'flacdsp_init.c',
+    'h264_intrapred.asm',
+    'h264_intrapred_10bit.asm',
+    'h264_intrapred_init.c',
+    'videodsp.asm',
+    'videodsp_init.c',
+    'vp8dsp.asm',
+    'vp8dsp_init.c',
+    'vp8dsp_loopfilter.asm',
+    'vp9dsp_init.c',
+    'vp9dsp_init_10bpp.c',
+    'vp9dsp_init_12bpp.c',
+    'vp9dsp_init_16bpp.c',
+    'vp9intrapred.asm',
+    'vp9intrapred_16bpp.asm',
+    'vp9itxfm.asm',
+    'vp9itxfm_16bpp.asm',
+    'vp9lpf.asm',
+    'vp9lpf_16bpp.asm',
+    'vp9mc.asm',
+    'vp9mc_16bpp.asm',
+]
+
+FINAL_LIBRARY = 'mozavcodec'
+
+include('/media/ffvpx/ffvpxcommon.mozbuild')
diff --git a/media/ffvpx/libavcodec/x86/videodsp.asm b/media/ffvpx/libavcodec/x86/videodsp.asm
new file mode 100644
index 000000000..a807d3b88
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/videodsp.asm
@@ -0,0 +1,468 @@
+;******************************************************************************
+;* Core video DSP functions
+;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+; slow vertical extension loop function. Works with variable-width, and
+; does per-line reading/writing of source data
+
+%macro V_COPY_ROW 2 ; type (top/body/bottom), h
+.%1_y_loop:                                     ; do {
+    mov              wq, r7mp                   ;   initialize w (r7mp = wmp)
+.%1_x_loop:                                     ;   do {
+    movu             m0, [srcq+wq]              ;     m0 = read($mmsize)
+    movu      [dstq+wq], m0                     ;     write(m0, $mmsize)
+    add              wq, mmsize                 ;     w -= $mmsize
+    cmp              wq, -mmsize                ;   } while (w > $mmsize);
+    jl .%1_x_loop
+    movu             m0, [srcq-mmsize]          ;     m0 = read($mmsize)
+    movu  [dstq-mmsize], m0                     ;     write(m0, $mmsize)
+%ifidn %1, body                                 ;   if ($type == body) {
+    add            srcq, src_strideq            ;     src += src_stride
+%endif                                          ;   }
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    dec              %2                         ; } while (--$h);
+    jnz .%1_y_loop
+%endmacro
+
+%macro vvar_fn 0
+; .----. <- zero
+; |    |    <- top is copied from first line in body of source
+; |----| <- start_y
+; |    |    <- body is copied verbatim (line-by-line) from source
+; |----| <- end_y
+; |    |    <- bottom is copied from last line in body of source
+; '----' <- bh
+%if ARCH_X86_64
+cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \
+                                start_y, end_y, bh, w
+%else ; x86-32
+cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w
+%define src_strideq r3mp
+%define dst_strideq r1mp
+    mov            srcq, r2mp
+    mov        start_yq, r4mp
+    mov          end_yq, r5mp
+    mov             bhq, r6mp
+%endif
+    sub             bhq, end_yq                 ; bh    -= end_q
+    sub          end_yq, start_yq               ; end_q -= start_q
+    add            srcq, r7mp                   ; (r7mp = wmp)
+    add            dstq, r7mp                   ; (r7mp = wmp)
+    neg            r7mp                         ; (r7mp = wmp)
+    test       start_yq, start_yq               ; if (start_q) {
+    jz .body
+    V_COPY_ROW      top, start_yq               ;   v_copy_row(top, start_yq)
+.body:                                          ; }
+    V_COPY_ROW     body, end_yq                 ; v_copy_row(body, end_yq)
+    test            bhq, bhq                    ; if (bh) {
+    jz .end
+    sub            srcq, src_strideq            ;   src -= src_stride
+    V_COPY_ROW   bottom, bhq                    ;   v_copy_row(bottom, bh)
+.end:                                           ; }
+    RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+vvar_fn
+%endif
+
+INIT_XMM sse
+vvar_fn
+
+%macro hvar_fn 0
+cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w
+    lea            dstq, [dstq+n_wordsq*2]
+    neg        n_wordsq
+    lea        start_xq, [start_xq+n_wordsq*2]
+.y_loop:                                        ; do {
+%if cpuflag(avx2)
+    vpbroadcastb     m0, [dstq+start_xq]
+    mov              wq, n_wordsq               ;   initialize w
+%else
+    movzx            wd, byte [dstq+start_xq]   ;   w = read(1)
+    imul             wd, 0x01010101             ;   w *= 0x01010101
+    movd             m0, wd
+    mov              wq, n_wordsq               ;   initialize w
+%if cpuflag(sse2)
+    pshufd           m0, m0, q0000              ;   splat
+%else ; mmx
+    punpckldq        m0, m0                     ;   splat
+%endif ; mmx/sse
+%endif ; avx2
+.x_loop:                                        ;   do {
+    movu    [dstq+wq*2], m0                     ;     write($reg, $mmsize)
+    add              wq, mmsize/2               ;     w -= $mmsize/2
+    cmp              wq, -mmsize/2              ;   } while (w > $mmsize/2)
+    jl .x_loop
+    movu  [dstq-mmsize], m0                     ;   write($reg, $mmsize)
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    dec              hq                         ; } while (h--)
+    jnz .y_loop
+    RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+hvar_fn
+%endif
+
+INIT_XMM sse2
+hvar_fn
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+hvar_fn
+%endif
+
+; macro to read/write a horizontal number of pixels (%2) to/from registers
+; on sse, - fills xmm0-15 for consecutive sets of 16 pixels
+;         - if (%2 & 8)  fills 8 bytes into xmm$next
+;         - if (%2 & 4)  fills 4 bytes into xmm$next
+;         - if (%2 & 3)  fills 1, 2 or 4 bytes in eax
+; on mmx, - fills mm0-7 for consecutive sets of 8 pixels
+;         - if (%2 & 4)  fills 4 bytes into mm$next
+;         - if (%2 & 3)  fills 1, 2 or 4 bytes in eax
+; writing data out is in the same way
+%macro READ_NUM_BYTES 2
+%assign %%off 0     ; offset in source buffer
+%assign %%mmx_idx 0 ; mmx register index
+%assign %%xmm_idx 0 ; xmm register index
+
+%rep %2/mmsize
+%if mmsize == 16
+    movu   xmm %+ %%xmm_idx, [srcq+%%off]
+%assign %%xmm_idx %%xmm_idx+1
+%else ; mmx
+    movu    mm %+ %%mmx_idx, [srcq+%%off]
+%assign %%mmx_idx %%mmx_idx+1
+%endif
+%assign %%off %%off+mmsize
+%endrep ; %2/mmsize
+
+%if mmsize == 16
+%if (%2-%%off) >= 8
+%if %2 > 16 && (%2-%%off) > 8
+    movu   xmm %+ %%xmm_idx, [srcq+%2-16]
+%assign %%xmm_idx %%xmm_idx+1
+%assign %%off %2
+%else
+    movq    mm %+ %%mmx_idx, [srcq+%%off]
+%assign %%mmx_idx %%mmx_idx+1
+%assign %%off %%off+8
+%endif
+%endif ; (%2-%%off) >= 8
+%endif
+
+%if (%2-%%off) >= 4
+%if %2 > 8 && (%2-%%off) > 4
+    movq    mm %+ %%mmx_idx, [srcq+%2-8]
+%assign %%off %2
+%else
+    movd    mm %+ %%mmx_idx, [srcq+%%off]
+%assign %%off %%off+4
+%endif
+%assign %%mmx_idx %%mmx_idx+1
+%endif ; (%2-%%off) >= 4
+
+%if (%2-%%off) >= 1
+%if %2 >= 4
+    movd mm %+ %%mmx_idx, [srcq+%2-4]
+%elif (%2-%%off) == 1
+    mov            valb, [srcq+%2-1]
+%elif (%2-%%off) == 2
+    mov            valw, [srcq+%2-2]
+%else
+    mov            valb, [srcq+%2-1]
+    ror            vald, 16
+    mov            valw, [srcq+%2-3]
+%endif
+%endif ; (%2-%%off) >= 1
+%endmacro ; READ_NUM_BYTES
+
+%macro WRITE_NUM_BYTES 2
+%assign %%off 0     ; offset in destination buffer
+%assign %%mmx_idx 0 ; mmx register index
+%assign %%xmm_idx 0 ; xmm register index
+
+%rep %2/mmsize
+%if mmsize == 16
+    movu   [dstq+%%off], xmm %+ %%xmm_idx
+%assign %%xmm_idx %%xmm_idx+1
+%else ; mmx
+    movu   [dstq+%%off], mm %+ %%mmx_idx
+%assign %%mmx_idx %%mmx_idx+1
+%endif
+%assign %%off %%off+mmsize
+%endrep ; %2/mmsize
+
+%if mmsize == 16
+%if (%2-%%off) >= 8
+%if %2 > 16 && (%2-%%off) > 8
+    movu   [dstq+%2-16], xmm %+ %%xmm_idx
+%assign %%xmm_idx %%xmm_idx+1
+%assign %%off %2
+%else
+    movq   [dstq+%%off], mm %+ %%mmx_idx
+%assign %%mmx_idx %%mmx_idx+1
+%assign %%off %%off+8
+%endif
+%endif ; (%2-%%off) >= 8
+%endif
+
+%if (%2-%%off) >= 4
+%if %2 > 8 && (%2-%%off) > 4
+    movq    [dstq+%2-8], mm %+ %%mmx_idx
+%assign %%off %2
+%else
+    movd   [dstq+%%off], mm %+ %%mmx_idx
+%assign %%off %%off+4
+%endif
+%assign %%mmx_idx %%mmx_idx+1
+%endif ; (%2-%%off) >= 4
+
+%if (%2-%%off) >= 1
+%if %2 >= 4
+    movd    [dstq+%2-4], mm %+ %%mmx_idx
+%elif (%2-%%off) == 1
+    mov     [dstq+%2-1], valb
+%elif (%2-%%off) == 2
+    mov     [dstq+%2-2], valw
+%else
+    mov     [dstq+%2-3], valw
+    ror            vald, 16
+    mov     [dstq+%2-1], valb
+%ifnidn %1, body
+    ror            vald, 16
+%endif
+%endif
+%endif ; (%2-%%off) >= 1
+%endmacro ; WRITE_NUM_BYTES
+
+; vertical top/bottom extend and body copy fast loops
+; these are function pointers to set-width line copy functions, i.e.
+; they read a fixed number of pixels into set registers, and write
+; those out into the destination buffer
+%macro VERTICAL_EXTEND 2
+%assign %%n %1
+%rep 1+%2-%1
+%if %%n <= 3
+%if ARCH_X86_64
+cglobal emu_edge_vfix %+ %%n, 6, 8, 0, dst, dst_stride, src, src_stride, \
+                                       start_y, end_y, val, bh
+    mov             bhq, r6mp                   ; r6mp = bhmp
+%else ; x86-32
+cglobal emu_edge_vfix %+ %%n, 0, 6, 0, val, dst, src, start_y, end_y, bh
+    mov            dstq, r0mp
+    mov            srcq, r2mp
+    mov        start_yq, r4mp
+    mov          end_yq, r5mp
+    mov             bhq, r6mp
+%define dst_strideq r1mp
+%define src_strideq r3mp
+%endif ; x86-64/32
+%else
+%if ARCH_X86_64
+cglobal emu_edge_vfix %+ %%n, 7, 7, 1, dst, dst_stride, src, src_stride, \
+                                       start_y, end_y, bh
+%else ; x86-32
+cglobal emu_edge_vfix %+ %%n, 1, 5, 1, dst, src, start_y, end_y, bh
+    mov            srcq, r2mp
+    mov        start_yq, r4mp
+    mov          end_yq, r5mp
+    mov             bhq, r6mp
+%define dst_strideq r1mp
+%define src_strideq r3mp
+%endif ; x86-64/32
+%endif
+    ; FIXME move this to c wrapper?
+    sub             bhq, end_yq                 ; bh    -= end_y
+    sub          end_yq, start_yq               ; end_y -= start_y
+
+    ; extend pixels above body
+    test       start_yq, start_yq               ; if (start_y) {
+    jz .body_loop
+    READ_NUM_BYTES  top, %%n                    ;   $variable_regs = read($n)
+.top_loop:                                      ;   do {
+    WRITE_NUM_BYTES top, %%n                    ;     write($variable_regs, $n)
+    add            dstq, dst_strideq            ;     dst += linesize
+    dec        start_yq                         ;   } while (--start_y)
+    jnz .top_loop                               ; }
+
+    ; copy body pixels
+.body_loop:                                     ; do {
+    READ_NUM_BYTES  body, %%n                   ;   $variable_regs = read($n)
+    WRITE_NUM_BYTES body, %%n                   ;   write($variable_regs, $n)
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    add            srcq, src_strideq            ;   src += src_stride
+    dec          end_yq                         ; } while (--end_y)
+    jnz .body_loop
+
+    ; copy bottom pixels
+    test            bhq, bhq                    ; if (block_h) {
+    jz .end
+    sub            srcq, src_strideq            ;   src -= linesize
+    READ_NUM_BYTES  bottom, %%n                 ;   $variable_regs = read($n)
+.bottom_loop:                                   ;   do {
+    WRITE_NUM_BYTES bottom, %%n                 ;     write($variable_regs, $n)
+    add            dstq, dst_strideq            ;     dst += linesize
+    dec             bhq                         ;   } while (--bh)
+    jnz .bottom_loop                            ; }
+
+.end:
+    RET
+%assign %%n %%n+1
+%endrep ; 1+%2-%1
+%endmacro ; VERTICAL_EXTEND
+
+INIT_MMX mmx
+VERTICAL_EXTEND 1, 15
+%if ARCH_X86_32
+VERTICAL_EXTEND 16, 22
+%endif
+
+INIT_XMM sse
+VERTICAL_EXTEND 16, 22
+
+; left/right (horizontal) fast extend functions
+; these are essentially identical to the vertical extend ones above,
+; just left/right separated because number of pixels to extend is
+; obviously not the same on both sides.
+
+%macro READ_V_PIXEL 2
+%if cpuflag(avx2)
+    vpbroadcastb     m0, %2
+%else
+    movzx          vald, byte %2
+    imul           vald, 0x01010101
+%if %1 >= 8
+    movd             m0, vald
+%if mmsize == 16
+    pshufd           m0, m0, q0000
+%else
+    punpckldq        m0, m0
+%endif ; mmsize == 16
+%endif ; %1 > 16
+%endif ; avx2
+%endmacro ; READ_V_PIXEL
+
+%macro WRITE_V_PIXEL 2
+%assign %%off 0
+
+%if %1 >= 8
+
+%rep %1/mmsize
+    movu     [%2+%%off], m0
+%assign %%off %%off+mmsize
+%endrep ; %1/mmsize
+
+%if mmsize == 16
+%if %1-%%off >= 8
+%if %1 > 16 && %1-%%off > 8
+    movu     [%2+%1-16], m0
+%assign %%off %1
+%else
+    movq     [%2+%%off], m0
+%assign %%off %%off+8
+%endif
+%endif ; %1-%%off >= 8
+%endif ; mmsize == 16
+
+%if %1-%%off >= 4
+%if %1 > 8 && %1-%%off > 4
+    movq      [%2+%1-8], m0
+%assign %%off %1
+%else
+    movd     [%2+%%off], m0
+%assign %%off %%off+4
+%endif
+%endif ; %1-%%off >= 4
+
+%else ; %1 < 8
+
+%rep %1/4
+    mov      [%2+%%off], vald
+%assign %%off %%off+4
+%endrep ; %1/4
+
+%endif ; %1 >=/< 8
+
+%if %1-%%off == 2
+%if cpuflag(avx2)
+    movd     [%2+%%off-2], m0
+%else
+    mov      [%2+%%off], valw
+%endif ; avx2
+%endif ; (%1-%%off)/2
+%endmacro ; WRITE_V_PIXEL
+
+%macro H_EXTEND 2
+%assign %%n %1
+%rep 1+(%2-%1)/2
+%if cpuflag(avx2)
+cglobal emu_edge_hfix %+ %%n, 4, 4, 1, dst, dst_stride, start_x, bh
+%else
+cglobal emu_edge_hfix %+ %%n, 4, 5, 1, dst, dst_stride, start_x, bh, val
+%endif
+.loop_y:                                        ; do {
+    READ_V_PIXEL    %%n, [dstq+start_xq]        ;   $variable_regs = read($n)
+    WRITE_V_PIXEL   %%n, dstq                   ;   write($variable_regs, $n)
+    add            dstq, dst_strideq            ;   dst += dst_stride
+    dec             bhq                         ; } while (--bh)
+    jnz .loop_y
+    RET
+%assign %%n %%n+2
+%endrep ; 1+(%2-%1)/2
+%endmacro ; H_EXTEND
+
+INIT_MMX mmx
+H_EXTEND 2, 14
+%if ARCH_X86_32
+H_EXTEND 16, 22
+%endif
+
+INIT_XMM sse2
+H_EXTEND 16, 22
+
+%if HAVE_AVX2_EXTERNAL
+INIT_XMM avx2
+H_EXTEND 8, 22
+%endif
+
+%macro PREFETCH_FN 1
+cglobal prefetch, 3, 3, 0, buf, stride, h
+.loop:
+    %1      [bufq]
+    add      bufq, strideq
+    dec        hd
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+PREFETCH_FN prefetcht0
+%if ARCH_X86_32
+INIT_MMX 3dnow
+PREFETCH_FN prefetch
+%endif
diff --git a/media/ffvpx/libavcodec/x86/videodsp_init.c b/media/ffvpx/libavcodec/x86/videodsp_init.c
new file mode 100644
index 000000000..26e072bb1
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/videodsp_init.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 2002-2012 Michael Niedermayer
+ * Copyright (C) 2012 Ronald S. Bultje
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/videodsp.h"
+
+#if HAVE_YASM
+typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride,
+                                const uint8_t *src, x86_reg src_stride,
+                                x86_reg start_y, x86_reg end_y, x86_reg bh);
+typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride,
+                                const uint8_t *src, x86_reg src_stride,
+                                x86_reg start_y, x86_reg end_y, x86_reg bh,
+                                x86_reg w);
+
+extern emu_edge_vfix_func ff_emu_edge_vfix1_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix2_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix3_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix4_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix5_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix6_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix7_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix8_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix9_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix10_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix11_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix12_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix13_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix14_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix15_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix16_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix17_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix18_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix19_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix20_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix21_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix22_mmx;
+#if ARCH_X86_32
+static emu_edge_vfix_func * const vfixtbl_mmx[22] = {
+    &ff_emu_edge_vfix1_mmx,  &ff_emu_edge_vfix2_mmx,  &ff_emu_edge_vfix3_mmx,
+    &ff_emu_edge_vfix4_mmx,  &ff_emu_edge_vfix5_mmx,  &ff_emu_edge_vfix6_mmx,
+    &ff_emu_edge_vfix7_mmx,  &ff_emu_edge_vfix8_mmx,  &ff_emu_edge_vfix9_mmx,
+    &ff_emu_edge_vfix10_mmx, &ff_emu_edge_vfix11_mmx, &ff_emu_edge_vfix12_mmx,
+    &ff_emu_edge_vfix13_mmx, &ff_emu_edge_vfix14_mmx, &ff_emu_edge_vfix15_mmx,
+    &ff_emu_edge_vfix16_mmx, &ff_emu_edge_vfix17_mmx, &ff_emu_edge_vfix18_mmx,
+    &ff_emu_edge_vfix19_mmx, &ff_emu_edge_vfix20_mmx, &ff_emu_edge_vfix21_mmx,
+    &ff_emu_edge_vfix22_mmx
+};
+#endif
+extern emu_edge_vvar_func ff_emu_edge_vvar_mmx;
+extern emu_edge_vfix_func ff_emu_edge_vfix16_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix17_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix18_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix19_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix20_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix21_sse;
+extern emu_edge_vfix_func ff_emu_edge_vfix22_sse;
+static emu_edge_vfix_func * const vfixtbl_sse[22] = {
+    ff_emu_edge_vfix1_mmx,  ff_emu_edge_vfix2_mmx,  ff_emu_edge_vfix3_mmx,
+    ff_emu_edge_vfix4_mmx,  ff_emu_edge_vfix5_mmx,  ff_emu_edge_vfix6_mmx,
+    ff_emu_edge_vfix7_mmx,  ff_emu_edge_vfix8_mmx,  ff_emu_edge_vfix9_mmx,
+    ff_emu_edge_vfix10_mmx, ff_emu_edge_vfix11_mmx, ff_emu_edge_vfix12_mmx,
+    ff_emu_edge_vfix13_mmx, ff_emu_edge_vfix14_mmx, ff_emu_edge_vfix15_mmx,
+    ff_emu_edge_vfix16_sse, ff_emu_edge_vfix17_sse, ff_emu_edge_vfix18_sse,
+    ff_emu_edge_vfix19_sse, ff_emu_edge_vfix20_sse, ff_emu_edge_vfix21_sse,
+    ff_emu_edge_vfix22_sse
+};
+extern emu_edge_vvar_func ff_emu_edge_vvar_sse;
+
+typedef void emu_edge_hfix_func(uint8_t *dst, x86_reg dst_stride,
+                                x86_reg start_x, x86_reg bh);
+typedef void emu_edge_hvar_func(uint8_t *dst, x86_reg dst_stride,
+                                x86_reg start_x, x86_reg n_words, x86_reg bh);
+
+extern emu_edge_hfix_func ff_emu_edge_hfix2_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix4_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix6_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix8_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_mmx;
+#if ARCH_X86_32
+static emu_edge_hfix_func * const hfixtbl_mmx[11] = {
+    ff_emu_edge_hfix2_mmx,  ff_emu_edge_hfix4_mmx,  ff_emu_edge_hfix6_mmx,
+    ff_emu_edge_hfix8_mmx,  ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx,
+    ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_mmx, ff_emu_edge_hfix18_mmx,
+    ff_emu_edge_hfix20_mmx, ff_emu_edge_hfix22_mmx
+};
+#endif
+extern emu_edge_hvar_func ff_emu_edge_hvar_mmx;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_sse2;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_sse2;
+static emu_edge_hfix_func * const hfixtbl_sse2[11] = {
+    ff_emu_edge_hfix2_mmx,  ff_emu_edge_hfix4_mmx,  ff_emu_edge_hfix6_mmx,
+    ff_emu_edge_hfix8_mmx,  ff_emu_edge_hfix10_mmx, ff_emu_edge_hfix12_mmx,
+    ff_emu_edge_hfix14_mmx, ff_emu_edge_hfix16_sse2, ff_emu_edge_hfix18_sse2,
+    ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
+#if HAVE_AVX2_EXTERNAL
+extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2;
+static emu_edge_hfix_func * const hfixtbl_avx2[11] = {
+    ff_emu_edge_hfix2_mmx,  ff_emu_edge_hfix4_mmx,  ff_emu_edge_hfix6_mmx,
+    ff_emu_edge_hfix8_avx2,  ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2,
+    ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2,
+    ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_avx2;
+#endif
+
+static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
+                                              ptrdiff_t dst_stride,
+                                              ptrdiff_t src_stride,
+                                              x86_reg block_w, x86_reg block_h,
+                                              x86_reg src_x, x86_reg src_y,
+                                              x86_reg w, x86_reg h,
+                                              emu_edge_vfix_func * const *vfix_tbl,
+                                              emu_edge_vvar_func *v_extend_var,
+                                              emu_edge_hfix_func * const *hfix_tbl,
+                                              emu_edge_hvar_func *h_extend_var)
+{
+    x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p;
+
+    if (!w || !h)
+        return;
+
+    av_assert2(block_w <= FFABS(dst_stride));
+
+    if (src_y >= h) {
+        src -= src_y*src_stride;
+        src_y_add = h - 1;
+        src_y     = h - 1;
+    } else if (src_y <= -block_h) {
+        src -= src_y*src_stride;
+        src_y_add = 1 - block_h;
+        src_y     = 1 - block_h;
+    }
+    if (src_x >= w) {
+        src   += w - 1 - src_x;
+        src_x  = w - 1;
+    } else if (src_x <= -block_w) {
+        src   += 1 - block_w - src_x;
+        src_x  = 1 - block_w;
+    }
+
+    start_y = FFMAX(0, -src_y);
+    start_x = FFMAX(0, -src_x);
+    end_y   = FFMIN(block_h, h-src_y);
+    end_x   = FFMIN(block_w, w-src_x);
+    av_assert2(start_x < end_x && block_w > 0);
+    av_assert2(start_y < end_y && block_h > 0);
+
+    // fill in the to-be-copied part plus all above/below
+    src += (src_y_add + start_y) * src_stride + start_x;
+    w = end_x - start_x;
+    if (w <= 22) {
+        vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride,
+                        start_y, end_y, block_h);
+    } else {
+        v_extend_var(dst + start_x, dst_stride, src, src_stride,
+                     start_y, end_y, block_h, w);
+    }
+
+    // fill left
+    if (start_x) {
+        if (start_x <= 22) {
+            hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h);
+        } else {
+            h_extend_var(dst, dst_stride,
+                         start_x, (start_x + 1) >> 1, block_h);
+        }
+    }
+
+    // fill right
+    p = block_w - end_x;
+    if (p) {
+        if (p <= 22) {
+            hfix_tbl[(p - 1) >> 1](dst + end_x - (p & 1), dst_stride,
+                                   -!(p & 1), block_h);
+        } else {
+            h_extend_var(dst + end_x - (p & 1), dst_stride,
+                         -!(p & 1), (p + 1) >> 1, block_h);
+        }
+    }
+}
+
+#if ARCH_X86_32
+static av_noinline void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src,
+                                             ptrdiff_t buf_stride,
+                                             ptrdiff_t src_stride,
+                                             int block_w, int block_h,
+                                             int src_x, int src_y, int w, int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_mmx, &ff_emu_edge_vvar_mmx,
+                     hfixtbl_mmx, &ff_emu_edge_hvar_mmx);
+}
+
+static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
+                                             ptrdiff_t buf_stride,
+                                             ptrdiff_t src_stride,
+                                             int block_w, int block_h,
+                                             int src_x, int src_y, int w, int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+                     hfixtbl_mmx, &ff_emu_edge_hvar_mmx);
+}
+#endif
+
+static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
+                                              ptrdiff_t buf_stride,
+                                              ptrdiff_t src_stride,
+                                              int block_w, int block_h,
+                                              int src_x, int src_y, int w,
+                                              int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+                     hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
+}
+
+#if HAVE_AVX2_EXTERNAL
+static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src,
+                                              ptrdiff_t buf_stride,
+                                              ptrdiff_t src_stride,
+                                              int block_w, int block_h,
+                                              int src_x, int src_y, int w,
+                                              int h)
+{
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+                     hfixtbl_avx2, &ff_emu_edge_hvar_avx2);
+}
+#endif /* HAVE_AVX2_EXTERNAL */
+#endif /* HAVE_YASM */
+
+void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
+void ff_prefetch_3dnow(uint8_t *buf, ptrdiff_t stride, int h);
+
+av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+    if (EXTERNAL_MMX(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_mmx;
+    }
+    if (EXTERNAL_AMD3DNOW(cpu_flags)) {
+        ctx->prefetch = ff_prefetch_3dnow;
+    }
+#endif /* ARCH_X86_32 */
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        ctx->prefetch = ff_prefetch_mmxext;
+    }
+#if ARCH_X86_32
+    if (EXTERNAL_SSE(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_sse;
+    }
+#endif /* ARCH_X86_32 */
+    if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_sse2;
+    }
+#if HAVE_AVX2_EXTERNAL
+    if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) {
+        ctx->emulated_edge_mc = emulated_edge_mc_avx2;
+    }
+#endif
+#endif /* HAVE_YASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp56_arith.h b/media/ffvpx/libavcodec/x86/vp56_arith.h
new file mode 100644
index 000000000..810cc8dcd
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp56_arith.h
@@ -0,0 +1,51 @@
+/**
+ * VP5 and VP6 compatible video decoder (arith decoder)
+ *
+ * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (C) 2010  Eli Friedman
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP56_ARITH_H
+#define AVCODEC_X86_VP56_ARITH_H
+
+#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS
+#define vp56_rac_get_prob vp56_rac_get_prob
+static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+{
+    unsigned int code_word = vp56_rac_renorm(c);
+    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
+    unsigned int low_shift = low << 16;
+    int bit = 0;
+    c->code_word = code_word;
+
+    __asm__(
+        "subl  %4, %1      \n\t"
+        "subl  %3, %2      \n\t"
+        "setae %b0         \n\t"
+        "cmovb %4, %1      \n\t"
+        "cmovb %5, %2      \n\t"
+        : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word)
+        : "r"(low_shift), "r"(low), "r"(code_word)
+    );
+
+    return bit;
+}
+#endif
+
+#endif /* AVCODEC_X86_VP56_ARITH_H */
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm
new file mode 100644
index 000000000..538b3f4a9
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm
@@ -0,0 +1,1225 @@
+;******************************************************************************
+;* VP8 MMXEXT optimizations
+;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+fourtap_filter_hw_m: times 4 dw  -6, 123
+                     times 4 dw  12,  -1
+                     times 4 dw  -9,  93
+                     times 4 dw  50,  -6
+                     times 4 dw  -6,  50
+                     times 4 dw  93,  -9
+                     times 4 dw  -1,  12
+                     times 4 dw 123,  -6
+
+sixtap_filter_hw_m:  times 4 dw   2, -11
+                     times 4 dw 108,  36
+                     times 4 dw  -8,   1
+                     times 4 dw   3, -16
+                     times 4 dw  77,  77
+                     times 4 dw -16,   3
+                     times 4 dw   1,  -8
+                     times 4 dw  36, 108
+                     times 4 dw -11,   2
+
+fourtap_filter_hb_m: times 8 db  -6, 123
+                     times 8 db  12,  -1
+                     times 8 db  -9,  93
+                     times 8 db  50,  -6
+                     times 8 db  -6,  50
+                     times 8 db  93,  -9
+                     times 8 db  -1,  12
+                     times 8 db 123,  -6
+
+sixtap_filter_hb_m:  times 8 db   2,   1
+                     times 8 db -11, 108
+                     times 8 db  36,  -8
+                     times 8 db   3,   3
+                     times 8 db -16,  77
+                     times 8 db  77, -16
+                     times 8 db   1,   2
+                     times 8 db  -8,  36
+                     times 8 db 108, -11
+
+fourtap_filter_v_m:  times 8 dw  -6
+                     times 8 dw 123
+                     times 8 dw  12
+                     times 8 dw  -1
+                     times 8 dw  -9
+                     times 8 dw  93
+                     times 8 dw  50
+                     times 8 dw  -6
+                     times 8 dw  -6
+                     times 8 dw  50
+                     times 8 dw  93
+                     times 8 dw  -9
+                     times 8 dw  -1
+                     times 8 dw  12
+                     times 8 dw 123
+                     times 8 dw  -6
+
+sixtap_filter_v_m:   times 8 dw   2
+                     times 8 dw -11
+                     times 8 dw 108
+                     times 8 dw  36
+                     times 8 dw  -8
+                     times 8 dw   1
+                     times 8 dw   3
+                     times 8 dw -16
+                     times 8 dw  77
+                     times 8 dw  77
+                     times 8 dw -16
+                     times 8 dw   3
+                     times 8 dw   1
+                     times 8 dw  -8
+                     times 8 dw  36
+                     times 8 dw 108
+                     times 8 dw -11
+                     times 8 dw   2
+
+bilinear_filter_vw_m: times 8 dw 1
+                      times 8 dw 2
+                      times 8 dw 3
+                      times 8 dw 4
+                      times 8 dw 5
+                      times 8 dw 6
+                      times 8 dw 7
+
+bilinear_filter_vb_m: times 8 db 7, 1
+                      times 8 db 6, 2
+                      times 8 db 5, 3
+                      times 8 db 4, 4
+                      times 8 db 3, 5
+                      times 8 db 2, 6
+                      times 8 db 1, 7
+
+%ifdef PIC
+%define fourtap_filter_hw  picregq
+%define sixtap_filter_hw   picregq
+%define fourtap_filter_hb  picregq
+%define sixtap_filter_hb   picregq
+%define fourtap_filter_v   picregq
+%define sixtap_filter_v    picregq
+%define bilinear_filter_vw picregq
+%define bilinear_filter_vb picregq
+%define npicregs 1
+%else
+%define fourtap_filter_hw  fourtap_filter_hw_m
+%define sixtap_filter_hw   sixtap_filter_hw_m
+%define fourtap_filter_hb  fourtap_filter_hb_m
+%define sixtap_filter_hb   sixtap_filter_hb_m
+%define fourtap_filter_v   fourtap_filter_v_m
+%define sixtap_filter_v    sixtap_filter_v_m
+%define bilinear_filter_vw bilinear_filter_vw_m
+%define bilinear_filter_vb bilinear_filter_vb_m
+%define npicregs 0
+%endif
+
+filter_h2_shuf:  db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5,  6, 6,  7,  7,  8
+filter_h4_shuf:  db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,  8, 8,  9,  9, 10
+
+filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11,  7, 12
+filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,  7, 7,  8,  8,  9
+filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,  9, 9, 10, 10, 11
+
+pw_20091: times 4 dw 20091
+pw_17734: times 4 dw 17734
+
+cextern pw_3
+cextern pw_4
+cextern pw_64
+cextern pw_256
+
+SECTION .text
+
+;-------------------------------------------------------------------------------
+; subpel MC functions:
+;
+; void ff_put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, int deststride,
+;                                                 uint8_t *src, int srcstride,
+;                                                 int height,   int mx, int my);
+;-------------------------------------------------------------------------------
+
+%macro FILTER_SSSE3 1
+cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, height, mx, picreg
+    lea      mxd, [mxq*3]
+    mova      m3, [filter_h6_shuf2]
+    mova      m4, [filter_h6_shuf3]
+%ifdef PIC
+    lea  picregq, [sixtap_filter_hb_m]
+%endif
+    mova      m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes
+    mova      m6, [sixtap_filter_hb+mxq*8-32]
+    mova      m7, [sixtap_filter_hb+mxq*8-16]
+
+.nextrow:
+    movu      m0, [srcq-2]
+    mova      m1, m0
+    mova      m2, m0
+%if mmsize == 8
+; For epel4, we need 9 bytes, but only 8 get loaded; to compensate, do the
+; shuffle with a memory operand
+    punpcklbw m0, [srcq+3]
+%else
+    pshufb    m0, [filter_h6_shuf1]
+%endif
+    pshufb    m1, m3
+    pshufb    m2, m4
+    pmaddubsw m0, m5
+    pmaddubsw m1, m6
+    pmaddubsw m2, m7
+    paddsw    m0, m1
+    paddsw    m0, m2
+    pmulhrsw  m0, [pw_256]
+    packuswb  m0, m0
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    REP_RET
+
+cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+    mova      m2, [pw_256]
+    mova      m3, [filter_h2_shuf]
+    mova      m4, [filter_h4_shuf]
+%ifdef PIC
+    lea  picregq, [fourtap_filter_hb_m]
+%endif
+    mova      m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes
+    mova      m6, [fourtap_filter_hb+mxq]
+
+.nextrow:
+    movu      m0, [srcq-1]
+    mova      m1, m0
+    pshufb    m0, m3
+    pshufb    m1, m4
+    pmaddubsw m0, m5
+    pmaddubsw m1, m6
+    paddsw    m0, m1
+    pmulhrsw  m0, m2
+    packuswb  m0, m0
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    REP_RET
+
+cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [fourtap_filter_hb_m]
+%endif
+    mova      m5, [fourtap_filter_hb+myq-16]
+    mova      m6, [fourtap_filter_hb+myq]
+    mova      m7, [pw_256]
+
+    ; read 3 lines
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+  srcstrideq]
+    movh      m2, [srcq+2*srcstrideq]
+    add     srcq, srcstrideq
+
+.nextrow:
+    movh      m3, [srcq+2*srcstrideq]      ; read new row
+    mova      m4, m0
+    mova      m0, m1
+    punpcklbw m4, m1
+    mova      m1, m2
+    punpcklbw m2, m3
+    pmaddubsw m4, m5
+    pmaddubsw m2, m6
+    paddsw    m4, m2
+    mova      m2, m3
+    pmulhrsw  m4, m7
+    packuswb  m4, m4
+    movh  [dstq], m4
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    REP_RET
+
+cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    lea      myd, [myq*3]
+%ifdef PIC
+    lea  picregq, [sixtap_filter_hb_m]
+%endif
+    lea      myq, [sixtap_filter_hb+myq*8]
+
+    ; read 5 lines
+    sub     srcq, srcstrideq
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+srcstrideq]
+    movh      m2, [srcq+srcstrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    add     srcq, srcstrideq
+    movh      m3, [srcq]
+    movh      m4, [srcq+srcstrideq]
+
+.nextrow:
+    movh      m5, [srcq+2*srcstrideq]      ; read new row
+    mova      m6, m0
+    punpcklbw m6, m5
+    mova      m0, m1
+    punpcklbw m1, m2
+    mova      m7, m3
+    punpcklbw m7, m4
+    pmaddubsw m6, [myq-48]
+    pmaddubsw m1, [myq-32]
+    pmaddubsw m7, [myq-16]
+    paddsw    m6, m1
+    paddsw    m6, m7
+    mova      m1, m2
+    mova      m2, m3
+    pmulhrsw  m6, [pw_256]
+    mova      m3, m4
+    packuswb  m6, m6
+    mova      m4, m5
+    movh  [dstq], m6
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    REP_RET
+%endmacro
+
+INIT_MMX ssse3
+FILTER_SSSE3 4
+INIT_XMM ssse3
+FILTER_SSSE3 8
+
+; 4x4 block, H-only 4-tap filter
+INIT_MMX mmxext
+cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
+    shl       mxd, 4
+%ifdef PIC
+    lea   picregq, [fourtap_filter_hw_m]
+%endif
+    movq      mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words
+    movq      mm5, [fourtap_filter_hw+mxq]
+    movq      mm7, [pw_64]
+    pxor      mm6, mm6
+
+.nextrow:
+    movq      mm1, [srcq-1]                ; (ABCDEFGH) load 8 horizontal pixels
+
+    ; first set of 2 pixels
+    movq      mm2, mm1                     ; byte ABCD..
+    punpcklbw mm1, mm6                     ; byte->word ABCD
+    pshufw    mm0, mm2, 9                  ; byte CDEF..
+    punpcklbw mm0, mm6                     ; byte->word CDEF
+    pshufw    mm3, mm1, 0x94               ; word ABBC
+    pshufw    mm1, mm0, 0x94               ; word CDDE
+    pmaddwd   mm3, mm4                     ; multiply 2px with F0/F1
+    movq      mm0, mm1                     ; backup for second set of pixels
+    pmaddwd   mm1, mm5                     ; multiply 2px with F2/F3
+    paddd     mm3, mm1                     ; finish 1st 2px
+
+    ; second set of 2 pixels, use backup of above
+    punpckhbw mm2, mm6                     ; byte->word EFGH
+    pmaddwd   mm0, mm4                     ; multiply backed up 2px with F0/F1
+    pshufw    mm1, mm2, 0x94               ; word EFFG
+    pmaddwd   mm1, mm5                     ; multiply 2px with F2/F3
+    paddd     mm0, mm1                     ; finish 2nd 2px
+
+    ; merge two sets of 2 pixels into one set of 4, round/clip/store
+    packssdw  mm3, mm0                     ; merge dword->word (4px)
+    paddsw    mm3, mm7                     ; rounding
+    psraw     mm3, 7
+    packuswb  mm3, mm6                     ; clip and word->bytes
+    movd   [dstq], mm3                     ; store
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    REP_RET
+
+; 4x4 block, H-only 6-tap filter
+INIT_MMX mmxext
+cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
+    lea       mxd, [mxq*3]
+%ifdef PIC
+    lea   picregq, [sixtap_filter_hw_m]
+%endif
+    movq      mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words
+    movq      mm5, [sixtap_filter_hw+mxq*8-32]
+    movq      mm6, [sixtap_filter_hw+mxq*8-16]
+    movq      mm7, [pw_64]
+    pxor      mm3, mm3
+
+.nextrow:
+    movq      mm1, [srcq-2]                ; (ABCDEFGH) load 8 horizontal pixels
+
+    ; first set of 2 pixels
+    movq      mm2, mm1                     ; byte ABCD..
+    punpcklbw mm1, mm3                     ; byte->word ABCD
+    pshufw    mm0, mm2, 0x9                ; byte CDEF..
+    punpckhbw mm2, mm3                     ; byte->word EFGH
+    punpcklbw mm0, mm3                     ; byte->word CDEF
+    pshufw    mm1, mm1, 0x94               ; word ABBC
+    pshufw    mm2, mm2, 0x94               ; word EFFG
+    pmaddwd   mm1, mm4                     ; multiply 2px with F0/F1
+    pshufw    mm3, mm0, 0x94               ; word CDDE
+    movq      mm0, mm3                     ; backup for second set of pixels
+    pmaddwd   mm3, mm5                     ; multiply 2px with F2/F3
+    paddd     mm1, mm3                     ; add to 1st 2px cache
+    movq      mm3, mm2                     ; backup for second set of pixels
+    pmaddwd   mm2, mm6                     ; multiply 2px with F4/F5
+    paddd     mm1, mm2                     ; finish 1st 2px
+
+    ; second set of 2 pixels, use backup of above
+    movd      mm2, [srcq+3]                ; byte FGHI (prevent overreads)
+    pmaddwd   mm0, mm4                     ; multiply 1st backed up 2px with F0/F1
+    pmaddwd   mm3, mm5                     ; multiply 2nd backed up 2px with F2/F3
+    paddd     mm0, mm3                     ; add to 2nd 2px cache
+    pxor      mm3, mm3
+    punpcklbw mm2, mm3                     ; byte->word FGHI
+    pshufw    mm2, mm2, 0xE9               ; word GHHI
+    pmaddwd   mm2, mm6                     ; multiply 2px with F4/F5
+    paddd     mm0, mm2                     ; finish 2nd 2px
+
+    ; merge two sets of 2 pixels into one set of 4, round/clip/store
+    packssdw  mm1, mm0                     ; merge dword->word (4px)
+    paddsw    mm1, mm7                     ; rounding
+    psraw     mm1, 7
+    packuswb  mm1, mm3                     ; clip and word->bytes
+    movd   [dstq], mm1                     ; store
+
+    ; go to next line
+    add      dstq, dststrideq
+    add      srcq, srcstrideq
+    dec   heightd                          ; next row
+    jg .nextrow
+    REP_RET
+
+INIT_XMM sse2
+cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 5
+%ifdef PIC
+    lea  picregq, [fourtap_filter_v_m]
+%endif
+    lea      mxq, [fourtap_filter_v+mxq-32]
+    pxor      m7, m7
+    mova      m4, [pw_64]
+    mova      m5, [mxq+ 0]
+    mova      m6, [mxq+16]
+%ifdef m8
+    mova      m8, [mxq+32]
+    mova      m9, [mxq+48]
+%endif
+.nextrow:
+    movq      m0, [srcq-1]
+    movq      m1, [srcq-0]
+    movq      m2, [srcq+1]
+    movq      m3, [srcq+2]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    pmullw    m0, m5
+    pmullw    m1, m6
+%ifdef m8
+    pmullw    m2, m8
+    pmullw    m3, m9
+%else
+    pmullw    m2, [mxq+32]
+    pmullw    m3, [mxq+48]
+%endif
+    paddsw    m0, m1
+    paddsw    m2, m3
+    paddsw    m0, m2
+    paddsw    m0, m4
+    psraw     m0, 7
+    packuswb  m0, m7
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    REP_RET
+
+INIT_XMM sse2
+cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
+    lea      mxd, [mxq*3]
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [sixtap_filter_v_m]
+%endif
+    lea      mxq, [sixtap_filter_v+mxq-96]
+    pxor      m7, m7
+    mova      m6, [pw_64]
+%ifdef m8
+    mova      m8, [mxq+ 0]
+    mova      m9, [mxq+16]
+    mova     m10, [mxq+32]
+    mova     m11, [mxq+48]
+    mova     m12, [mxq+64]
+    mova     m13, [mxq+80]
+%endif
+.nextrow:
+    movq      m0, [srcq-2]
+    movq      m1, [srcq-1]
+    movq      m2, [srcq-0]
+    movq      m3, [srcq+1]
+    movq      m4, [srcq+2]
+    movq      m5, [srcq+3]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    punpcklbw m4, m7
+    punpcklbw m5, m7
+%ifdef m8
+    pmullw    m0, m8
+    pmullw    m1, m9
+    pmullw    m2, m10
+    pmullw    m3, m11
+    pmullw    m4, m12
+    pmullw    m5, m13
+%else
+    pmullw    m0, [mxq+ 0]
+    pmullw    m1, [mxq+16]
+    pmullw    m2, [mxq+32]
+    pmullw    m3, [mxq+48]
+    pmullw    m4, [mxq+64]
+    pmullw    m5, [mxq+80]
+%endif
+    paddsw    m1, m4
+    paddsw    m0, m5
+    paddsw    m1, m2
+    paddsw    m0, m3
+    paddsw    m0, m1
+    paddsw    m0, m6
+    psraw     m0, 7
+    packuswb  m0, m7
+    movh  [dstq], m0        ; store
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd            ; next row
+    jg .nextrow
+    REP_RET
+
+%macro FILTER_V 1
+; 4x4 block, V-only 4-tap filter
+cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 5
+%ifdef PIC
+    lea  picregq, [fourtap_filter_v_m]
+%endif
+    lea      myq, [fourtap_filter_v+myq-32]
+    mova      m6, [pw_64]
+    pxor      m7, m7
+    mova      m5, [myq+48]
+
+    ; read 3 lines
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+  srcstrideq]
+    movh      m2, [srcq+2*srcstrideq]
+    add     srcq, srcstrideq
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+
+.nextrow:
+    ; first calculate negative taps (to prevent losing positive overflows)
+    movh      m4, [srcq+2*srcstrideq]      ; read new row
+    punpcklbw m4, m7
+    mova      m3, m4
+    pmullw    m0, [myq+0]
+    pmullw    m4, m5
+    paddsw    m4, m0
+
+    ; then calculate positive taps
+    mova      m0, m1
+    pmullw    m1, [myq+16]
+    paddsw    m4, m1
+    mova      m1, m2
+    pmullw    m2, [myq+32]
+    paddsw    m4, m2
+    mova      m2, m3
+
+    ; round/clip/store
+    paddsw    m4, m6
+    psraw     m4, 7
+    packuswb  m4, m7
+    movh  [dstq], m4
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd                           ; next row
+    jg .nextrow
+    REP_RET
+
+
+; 4x4 block, V-only 6-tap filter
+cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+    lea      myq, [myq*3]
+%ifdef PIC
+    lea  picregq, [sixtap_filter_v_m]
+%endif
+    lea      myq, [sixtap_filter_v+myq-96]
+    pxor      m7, m7
+
+    ; read 5 lines
+    sub     srcq, srcstrideq
+    sub     srcq, srcstrideq
+    movh      m0, [srcq]
+    movh      m1, [srcq+srcstrideq]
+    movh      m2, [srcq+srcstrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    add     srcq, srcstrideq
+    movh      m3, [srcq]
+    movh      m4, [srcq+srcstrideq]
+    punpcklbw m0, m7
+    punpcklbw m1, m7
+    punpcklbw m2, m7
+    punpcklbw m3, m7
+    punpcklbw m4, m7
+
+.nextrow:
+    ; first calculate negative taps (to prevent losing positive overflows)
+    mova      m5, m1
+    pmullw    m5, [myq+16]
+    mova      m6, m4
+    pmullw    m6, [myq+64]
+    paddsw    m6, m5
+
+    ; then calculate positive taps
+    movh      m5, [srcq+2*srcstrideq]      ; read new row
+    punpcklbw m5, m7
+    pmullw    m0, [myq+0]
+    paddsw    m6, m0
+    mova      m0, m1
+    mova      m1, m2
+    pmullw    m2, [myq+32]
+    paddsw    m6, m2
+    mova      m2, m3
+    pmullw    m3, [myq+48]
+    paddsw    m6, m3
+    mova      m3, m4
+    mova      m4, m5
+    pmullw    m5, [myq+80]
+    paddsw    m6, m5
+
+    ; round/clip/store
+    paddsw    m6, [pw_64]
+    psraw     m6, 7
+    packuswb  m6, m7
+    movh  [dstq], m6
+
+    ; go to next line
+    add     dstq, dststrideq
+    add     srcq, srcstrideq
+    dec  heightd                           ; next row
+    jg .nextrow
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+FILTER_V 4
+INIT_XMM sse2
+FILTER_V 8
+
+%macro FILTER_BILINEAR 1
+cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vw_m]
+%endif
+    pxor      m6, m6
+    mova      m5, [bilinear_filter_vw+myq-1*16]
+    neg      myq
+    mova      m4, [bilinear_filter_vw+myq+7*16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0]
+    movh      m1, [srcq+srcstrideq*1]
+    movh      m3, [srcq+srcstrideq*2]
+    punpcklbw m0, m6
+    punpcklbw m1, m6
+    punpcklbw m3, m6
+    mova      m2, m1
+    pmullw    m0, m4
+    pmullw    m1, m5
+    pmullw    m2, m4
+    pmullw    m3, m5
+    paddsw    m0, m1
+    paddsw    m2, m3
+    psraw     m0, 2
+    psraw     m2, 2
+    pavgw     m0, m6
+    pavgw     m2, m6
+%if mmsize == 8
+    packuswb  m0, m0
+    packuswb  m2, m2
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m2
+%else
+    packuswb  m0, m2
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+
+    lea     dstq, [dstq+dststrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    sub  heightd, 2
+    jg .nextrow
+    REP_RET
+
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vw_m]
+%endif
+    pxor      m6, m6
+    mova      m5, [bilinear_filter_vw+mxq-1*16]
+    neg      mxq
+    mova      m4, [bilinear_filter_vw+mxq+7*16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0+0]
+    movh      m1, [srcq+srcstrideq*0+1]
+    movh      m2, [srcq+srcstrideq*1+0]
+    movh      m3, [srcq+srcstrideq*1+1]
+    punpcklbw m0, m6
+    punpcklbw m1, m6
+    punpcklbw m2, m6
+    punpcklbw m3, m6
+    pmullw    m0, m4
+    pmullw    m1, m5
+    pmullw    m2, m4
+    pmullw    m3, m5
+    paddsw    m0, m1
+    paddsw    m2, m3
+    psraw     m0, 2
+    psraw     m2, 2
+    pavgw     m0, m6
+    pavgw     m2, m6
+%if mmsize == 8
+    packuswb  m0, m0
+    packuswb  m2, m2
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m2
+%else
+    packuswb  m0, m2
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+
+    lea     dstq, [dstq+dststrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    sub  heightd, 2
+    jg .nextrow
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+FILTER_BILINEAR 4
+INIT_XMM sse2
+FILTER_BILINEAR 8
+
+%macro FILTER_BILINEAR_SSSE3 1
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+    shl      myd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+    movh      m0, [srcq+srcstrideq*0]
+    movh      m1, [srcq+srcstrideq*1]
+    movh      m2, [srcq+srcstrideq*2]
+    punpcklbw m0, m1
+    punpcklbw m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m1
+%else
+    packuswb  m0, m1
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+
+    lea     dstq, [dstq+dststrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    sub  heightd, 2
+    jg .nextrow
+    REP_RET
+
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+    shl      mxd, 4
+%ifdef PIC
+    lea  picregq, [bilinear_filter_vb_m]
+%endif
+    pxor      m4, m4
+    mova      m2, [filter_h2_shuf]
+    mova      m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+    movu      m0, [srcq+srcstrideq*0]
+    movu      m1, [srcq+srcstrideq*1]
+    pshufb    m0, m2
+    pshufb    m1, m2
+    pmaddubsw m0, m3
+    pmaddubsw m1, m3
+    psraw     m0, 2
+    psraw     m1, 2
+    pavgw     m0, m4
+    pavgw     m1, m4
+%if mmsize==8
+    packuswb  m0, m0
+    packuswb  m1, m1
+    movh   [dstq+dststrideq*0], m0
+    movh   [dstq+dststrideq*1], m1
+%else
+    packuswb  m0, m1
+    movh   [dstq+dststrideq*0], m0
+    movhps [dstq+dststrideq*1], m0
+%endif
+
+    lea     dstq, [dstq+dststrideq*2]
+    lea     srcq, [srcq+srcstrideq*2]
+    sub  heightd, 2
+    jg .nextrow
+    REP_RET
+%endmacro
+
+INIT_MMX ssse3
+FILTER_BILINEAR_SSSE3 4
+INIT_XMM ssse3
+FILTER_BILINEAR_SSSE3 8
+
+INIT_MMX mmx
+cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height
+.nextrow:
+    movq    mm0, [srcq+srcstrideq*0]
+    movq    mm1, [srcq+srcstrideq*1]
+    lea    srcq, [srcq+srcstrideq*2]
+    movq [dstq+dststrideq*0], mm0
+    movq [dstq+dststrideq*1], mm1
+    lea    dstq, [dstq+dststrideq*2]
+    sub heightd, 2
+    jg .nextrow
+    REP_RET
+
+%if ARCH_X86_32
+INIT_MMX mmx
+cglobal put_vp8_pixels16, 5, 5, 0, dst, dststride, src, srcstride, height
+.nextrow:
+    movq    mm0, [srcq+srcstrideq*0+0]
+    movq    mm1, [srcq+srcstrideq*0+8]
+    movq    mm2, [srcq+srcstrideq*1+0]
+    movq    mm3, [srcq+srcstrideq*1+8]
+    lea    srcq, [srcq+srcstrideq*2]
+    movq [dstq+dststrideq*0+0], mm0
+    movq [dstq+dststrideq*0+8], mm1
+    movq [dstq+dststrideq*1+0], mm2
+    movq [dstq+dststrideq*1+8], mm3
+    lea    dstq, [dstq+dststrideq*2]
+    sub heightd, 2
+    jg .nextrow
+    REP_RET
+%endif
+
+INIT_XMM sse
+cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
+.nextrow:
+    movups xmm0, [srcq+srcstrideq*0]
+    movups xmm1, [srcq+srcstrideq*1]
+    lea    srcq, [srcq+srcstrideq*2]
+    movaps [dstq+dststrideq*0], xmm0
+    movaps [dstq+dststrideq*1], xmm1
+    lea    dstq, [dstq+dststrideq*2]
+    sub heightd, 2
+    jg .nextrow
+    REP_RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
+;-----------------------------------------------------------------------------
+
+%macro ADD_DC 4
+    %4        m2, [dst1q+%3]
+    %4        m3, [dst1q+strideq+%3]
+    %4        m4, [dst2q+%3]
+    %4        m5, [dst2q+strideq+%3]
+    paddusb   m2, %1
+    paddusb   m3, %1
+    paddusb   m4, %1
+    paddusb   m5, %1
+    psubusb   m2, %2
+    psubusb   m3, %2
+    psubusb   m4, %2
+    psubusb   m5, %2
+    %4 [dst1q+%3], m2
+    %4 [dst1q+strideq+%3], m3
+    %4 [dst2q+%3], m4
+    %4 [dst2q+strideq+%3], m5
+%endmacro
+
+INIT_MMX mmx
+cglobal vp8_idct_dc_add, 3, 3, 0, dst, block, stride
+    ; load data
+    movd       m0, [blockq]
+
+    ; calculate DC
+    paddw      m0, [pw_4]
+    pxor       m1, m1
+    psraw      m0, 3
+    movd [blockq], m1
+    psubw      m1, m0
+    packuswb   m0, m0
+    packuswb   m1, m1
+    punpcklbw  m0, m0
+    punpcklbw  m1, m1
+    punpcklwd  m0, m0
+    punpcklwd  m1, m1
+
+    ; add DC
+    DEFINE_ARGS dst1, dst2, stride
+    lea     dst2q, [dst1q+strideq*2]
+    ADD_DC     m0, m1, 0, movh
+    RET
+
+INIT_XMM sse4
+cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride
+    ; load data
+    movd       m0, [blockq]
+    pxor       m1, m1
+
+    ; calculate DC
+    paddw      m0, [pw_4]
+    movd [blockq], m1
+    DEFINE_ARGS dst1, dst2, stride
+    lea     dst2q, [dst1q+strideq*2]
+    movd       m2, [dst1q]
+    movd       m3, [dst1q+strideq]
+    movd       m4, [dst2q]
+    movd       m5, [dst2q+strideq]
+    psraw      m0, 3
+    pshuflw    m0, m0, 0
+    punpcklqdq m0, m0
+    punpckldq  m2, m3
+    punpckldq  m4, m5
+    punpcklbw  m2, m1
+    punpcklbw  m4, m1
+    paddw      m2, m0
+    paddw      m4, m0
+    packuswb   m2, m4
+    movd   [dst1q], m2
+    pextrd [dst1q+strideq], m2, 1
+    pextrd [dst2q], m2, 2
+    pextrd [dst2q+strideq], m2, 3
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
+;-----------------------------------------------------------------------------
+
+%if ARCH_X86_32
+INIT_MMX mmx
+cglobal vp8_idct_dc_add4y, 3, 3, 0, dst, block, stride
+    ; load data
+    movd      m0, [blockq+32*0] ; A
+    movd      m1, [blockq+32*2] ; C
+    punpcklwd m0, [blockq+32*1] ; A B
+    punpcklwd m1, [blockq+32*3] ; C D
+    punpckldq m0, m1        ; A B C D
+    pxor      m6, m6
+
+    ; calculate DC
+    paddw     m0, [pw_4]
+    movd [blockq+32*0], m6
+    movd [blockq+32*1], m6
+    movd [blockq+32*2], m6
+    movd [blockq+32*3], m6
+    psraw     m0, 3
+    psubw     m6, m0
+    packuswb  m0, m0
+    packuswb  m6, m6
+    punpcklbw m0, m0 ; AABBCCDD
+    punpcklbw m6, m6 ; AABBCCDD
+    movq      m1, m0
+    movq      m7, m6
+    punpcklbw m0, m0 ; AAAABBBB
+    punpckhbw m1, m1 ; CCCCDDDD
+    punpcklbw m6, m6 ; AAAABBBB
+    punpckhbw m7, m7 ; CCCCDDDD
+
+    ; add DC
+    DEFINE_ARGS dst1, dst2, stride
+    lea    dst2q, [dst1q+strideq*2]
+    ADD_DC    m0, m6, 0, mova
+    ADD_DC    m1, m7, 8, mova
+    RET
+%endif
+
+INIT_XMM sse2
+cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride
+    ; load data
+    movd      m0, [blockq+32*0] ; A
+    movd      m1, [blockq+32*2] ; C
+    punpcklwd m0, [blockq+32*1] ; A B
+    punpcklwd m1, [blockq+32*3] ; C D
+    punpckldq m0, m1        ; A B C D
+    pxor      m1, m1
+
+    ; calculate DC
+    paddw     m0, [pw_4]
+    movd [blockq+32*0], m1
+    movd [blockq+32*1], m1
+    movd [blockq+32*2], m1
+    movd [blockq+32*3], m1
+    psraw     m0, 3
+    psubw     m1, m0
+    packuswb  m0, m0
+    packuswb  m1, m1
+    punpcklbw m0, m0
+    punpcklbw m1, m1
+    punpcklbw m0, m0
+    punpcklbw m1, m1
+
+    ; add DC
+    DEFINE_ARGS dst1, dst2, stride
+    lea    dst2q, [dst1q+strideq*2]
+    ADD_DC    m0, m1, 0, mova
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
+;-----------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride
+    ; load data
+    movd      m0, [blockq+32*0] ; A
+    movd      m1, [blockq+32*2] ; C
+    punpcklwd m0, [blockq+32*1] ; A B
+    punpcklwd m1, [blockq+32*3] ; C D
+    punpckldq m0, m1        ; A B C D
+    pxor      m6, m6
+
+    ; calculate DC
+    paddw     m0, [pw_4]
+    movd [blockq+32*0], m6
+    movd [blockq+32*1], m6
+    movd [blockq+32*2], m6
+    movd [blockq+32*3], m6
+    psraw     m0, 3
+    psubw     m6, m0
+    packuswb  m0, m0
+    packuswb  m6, m6
+    punpcklbw m0, m0 ; AABBCCDD
+    punpcklbw m6, m6 ; AABBCCDD
+    movq      m1, m0
+    movq      m7, m6
+    punpcklbw m0, m0 ; AAAABBBB
+    punpckhbw m1, m1 ; CCCCDDDD
+    punpcklbw m6, m6 ; AAAABBBB
+    punpckhbw m7, m7 ; CCCCDDDD
+
+    ; add DC
+    DEFINE_ARGS dst1, dst2, stride
+    lea    dst2q, [dst1q+strideq*2]
+    ADD_DC    m0, m6, 0, mova
+    lea    dst1q, [dst1q+strideq*4]
+    lea    dst2q, [dst2q+strideq*4]
+    ADD_DC    m1, m7, 0, mova
+    RET
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
+;-----------------------------------------------------------------------------
+
+; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
+;           this macro assumes that m6/m7 have words for 20091/17734 loaded
+%macro VP8_MULTIPLY_SUMSUB 4
+    mova      %3, %1
+    mova      %4, %2
+    pmulhw    %3, m6 ;20091(1)
+    pmulhw    %4, m6 ;20091(2)
+    paddw     %3, %1
+    paddw     %4, %2
+    paddw     %1, %1
+    paddw     %2, %2
+    pmulhw    %1, m7 ;35468(1)
+    pmulhw    %2, m7 ;35468(2)
+    psubw     %1, %4
+    paddw     %2, %3
+%endmacro
+
+; calculate x0=%1+%3; x1=%1-%3
+;           x2=mul_35468(%2)-mul_20091(%4); x3=mul_20091(%2)+mul_35468(%4)
+;           %1=x0+x3 (tmp0); %2=x1+x2 (tmp1); %3=x1-x2 (tmp2); %4=x0-x3 (tmp3)
+;           %5/%6 are temporary registers
+;           we assume m6/m7 have constant words 20091/17734 loaded in them
+%macro VP8_IDCT_TRANSFORM4x4_1D 6
+    SUMSUB_BA         w, %3,  %1,  %5     ;t0, t1
+    VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3
+    SUMSUB_BA         w, %4,  %3,  %5     ;tmp0, tmp3
+    SUMSUB_BA         w, %2,  %1,  %5     ;tmp1, tmp2
+    SWAP                 %4,  %1
+    SWAP                 %4,  %3
+%endmacro
+
+%macro VP8_IDCT_ADD 0
+cglobal vp8_idct_add, 3, 3, 0, dst, block, stride
+    ; load block data
+    movq         m0, [blockq+ 0]
+    movq         m1, [blockq+ 8]
+    movq         m2, [blockq+16]
+    movq         m3, [blockq+24]
+    movq         m6, [pw_20091]
+    movq         m7, [pw_17734]
+%if cpuflag(sse)
+    xorps      xmm0, xmm0
+    movaps [blockq+ 0], xmm0
+    movaps [blockq+16], xmm0
+%else
+    pxor         m4, m4
+    movq [blockq+ 0], m4
+    movq [blockq+ 8], m4
+    movq [blockq+16], m4
+    movq [blockq+24], m4
+%endif
+
+    ; actual IDCT
+    VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5
+    TRANSPOSE4x4W            0, 1, 2, 3, 4
+    paddw        m0, [pw_4]
+    VP8_IDCT_TRANSFORM4x4_1D 0, 1, 2, 3, 4, 5
+    TRANSPOSE4x4W            0, 1, 2, 3, 4
+
+    ; store
+    pxor         m4, m4
+    DEFINE_ARGS dst1, dst2, stride
+    lea       dst2q, [dst1q+2*strideq]
+    STORE_DIFFx2 m0, m1, m6, m7, m4, 3, dst1q, strideq
+    STORE_DIFFx2 m2, m3, m6, m7, m4, 3, dst2q, strideq
+
+    RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+VP8_IDCT_ADD
+%endif
+INIT_MMX sse
+VP8_IDCT_ADD
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16])
+;-----------------------------------------------------------------------------
+
+%macro SCATTER_WHT 3
+    movd dc1d, m%1
+    movd dc2d, m%2
+    mov [blockq+2*16*(0+%3)], dc1w
+    mov [blockq+2*16*(1+%3)], dc2w
+    shr  dc1d, 16
+    shr  dc2d, 16
+    psrlq m%1, 32
+    psrlq m%2, 32
+    mov [blockq+2*16*(4+%3)], dc1w
+    mov [blockq+2*16*(5+%3)], dc2w
+    movd dc1d, m%1
+    movd dc2d, m%2
+    mov [blockq+2*16*(8+%3)], dc1w
+    mov [blockq+2*16*(9+%3)], dc2w
+    shr  dc1d, 16
+    shr  dc2d, 16
+    mov [blockq+2*16*(12+%3)], dc1w
+    mov [blockq+2*16*(13+%3)], dc2w
+%endmacro
+
+%macro HADAMARD4_1D 4
+    SUMSUB_BADC w, %2, %1, %4, %3
+    SUMSUB_BADC w, %4, %2, %3, %1
+    SWAP %1, %4, %3
+%endmacro
+
+%macro VP8_DC_WHT 0
+cglobal vp8_luma_dc_wht, 2, 3, 0, block, dc1, dc2
+    movq          m0, [dc1q]
+    movq          m1, [dc1q+8]
+    movq          m2, [dc1q+16]
+    movq          m3, [dc1q+24]
+%if cpuflag(sse)
+    xorps      xmm0, xmm0
+    movaps [dc1q+ 0], xmm0
+    movaps [dc1q+16], xmm0
+%else
+    pxor         m4, m4
+    movq  [dc1q+ 0], m4
+    movq  [dc1q+ 8], m4
+    movq  [dc1q+16], m4
+    movq  [dc1q+24], m4
+%endif
+    HADAMARD4_1D  0, 1, 2, 3
+    TRANSPOSE4x4W 0, 1, 2, 3, 4
+    paddw         m0, [pw_3]
+    HADAMARD4_1D  0, 1, 2, 3
+    psraw         m0, 3
+    psraw         m1, 3
+    psraw         m2, 3
+    psraw         m3, 3
+    SCATTER_WHT   0, 1, 0
+    SCATTER_WHT   2, 3, 2
+    RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+VP8_DC_WHT
+%endif
+INIT_MMX sse
+VP8_DC_WHT
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp_init.c b/media/ffvpx/libavcodec/x86/vp8dsp_init.c
new file mode 100644
index 000000000..897d5a0e7
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp8dsp_init.c
@@ -0,0 +1,464 @@
+/*
+ * VP8 DSP functions x86-optimized
+ * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp8dsp.h"
+
+#if HAVE_YASM
+
+/*
+ * MC functions
+ */
+void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+
+void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+
+void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                uint8_t *src, ptrdiff_t srcstride,
+                                int height, int mx, int my);
+
+void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+
+void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
+                                   uint8_t *src, ptrdiff_t srcstride,
+                                   int height, int mx, int my);
+
+
+void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int height, int mx, int my);
+void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int height, int mx, int my);
+void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
+                             uint8_t *src, ptrdiff_t srcstride,
+                             int height, int mx, int my);
+
+#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
+static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
+    uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
+        dst,     dststride, src,     srcstride, height, mx, my); \
+    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
+        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
+}
+#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
+static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
+    uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
+        dst,     dststride, src,     srcstride, height, mx, my); \
+    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
+        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
+}
+
+#if ARCH_X86_32
+TAP_W8 (mmxext, epel, h4)
+TAP_W8 (mmxext, epel, h6)
+TAP_W16(mmxext, epel, h6)
+TAP_W8 (mmxext, epel, v4)
+TAP_W8 (mmxext, epel, v6)
+TAP_W16(mmxext, epel, v6)
+TAP_W8 (mmxext, bilinear, h)
+TAP_W16(mmxext, bilinear, h)
+TAP_W8 (mmxext, bilinear, v)
+TAP_W16(mmxext, bilinear, v)
+#endif
+
+TAP_W16(sse2,  epel, h6)
+TAP_W16(sse2,  epel, v6)
+TAP_W16(sse2,  bilinear, h)
+TAP_W16(sse2,  bilinear, v)
+
+TAP_W16(ssse3, epel, h6)
+TAP_W16(ssse3, epel, v6)
+TAP_W16(ssse3, bilinear, h)
+TAP_W16(ssse3, bilinear, v)
+
+#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
+static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
+    uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + TAPNUMY - 1)]); \
+    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
+    src -= srcstride * (TAPNUMY / 2 - 1); \
+    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
+        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
+    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
+        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
+}
+
+#if ARCH_X86_32
+#define HVTAPMMX(x, y) \
+HVTAP(mmxext, 8, x, y,  4,  8) \
+HVTAP(mmxext, 8, x, y,  8, 16)
+
+HVTAP(mmxext, 8, 6, 6, 16, 16)
+#else
+#define HVTAPMMX(x, y) \
+HVTAP(mmxext, 8, x, y,  4,  8)
+#endif
+
+HVTAPMMX(4, 4)
+HVTAPMMX(4, 6)
+HVTAPMMX(6, 4)
+HVTAPMMX(6, 6)
+
+#define HVTAPSSE2(x, y, w) \
+HVTAP(sse2,  16, x, y, w, 16) \
+HVTAP(ssse3, 16, x, y, w, 16)
+
+HVTAPSSE2(4, 4, 8)
+HVTAPSSE2(4, 6, 8)
+HVTAPSSE2(6, 4, 8)
+HVTAPSSE2(6, 6, 8)
+HVTAPSSE2(6, 6, 16)
+
+HVTAP(ssse3, 16, 4, 4, 4, 8)
+HVTAP(ssse3, 16, 4, 6, 4, 8)
+HVTAP(ssse3, 16, 6, 4, 4, 8)
+HVTAP(ssse3, 16, 6, 6, 4, 8)
+
+#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
+static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
+    uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
+    ptrdiff_t srcstride, int height, int mx, int my) \
+{ \
+    LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + 2)]); \
+    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
+        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
+    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
+        dst, dststride, tmp, SIZE,      height,     mx, my); \
+}
+
+HVBILIN(mmxext,  8,  4,  8)
+#if ARCH_X86_32
+HVBILIN(mmxext,  8,  8, 16)
+HVBILIN(mmxext,  8, 16, 16)
+#endif
+HVBILIN(sse2,  8,  8, 16)
+HVBILIN(sse2,  8, 16, 16)
+HVBILIN(ssse3, 8,  4,  8)
+HVBILIN(ssse3, 8,  8, 16)
+HVBILIN(ssse3, 8, 16, 16)
+
+void ff_vp8_idct_dc_add_mmx(uint8_t *dst, int16_t block[16],
+                            ptrdiff_t stride);
+void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16],
+                             ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride);
+void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16],
+                               ptrdiff_t stride);
+void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16],
+                               ptrdiff_t stride);
+void ff_vp8_luma_dc_wht_mmx(int16_t block[4][4][16], int16_t dc[16]);
+void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]);
+void ff_vp8_idct_add_mmx(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
+
+#define DECLARE_LOOP_FILTER(NAME)                                       \
+void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
+                                          ptrdiff_t stride,             \
+                                          int flim);                    \
+void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
+                                          ptrdiff_t stride,             \
+                                          int flim);                    \
+void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);    \
+void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
+                                             ptrdiff_t stride,          \
+                                             int e, int i, int hvt);    \
+void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);    \
+void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
+                                             uint8_t *dstV,             \
+                                             ptrdiff_t s,               \
+                                             int e, int i, int hvt);
+
+DECLARE_LOOP_FILTER(mmx)
+DECLARE_LOOP_FILTER(mmxext)
+DECLARE_LOOP_FILTER(sse2)
+DECLARE_LOOP_FILTER(ssse3)
+DECLARE_LOOP_FILTER(sse4)
+
+#endif /* HAVE_YASM */
+
+#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
+    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
+
+#define VP8_MC_FUNC(IDX, SIZE, OPT) \
+    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
+    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
+    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
+
+#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
+    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
+    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
+
+
+av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+#if ARCH_X86_32
+        c->put_vp8_epel_pixels_tab[0][0][0]     =
+        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
+#endif
+        c->put_vp8_epel_pixels_tab[1][0][0]     =
+        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
+    }
+
+    /* note that 4-tap width=16 functions are missing because w=16
+     * is only used for luma, and luma is always a copy or sixtap. */
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        VP8_MC_FUNC(2, 4, mmxext);
+        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
+#if ARCH_X86_32
+        VP8_LUMA_MC_FUNC(0, 16, mmxext);
+        VP8_MC_FUNC(1, 8, mmxext);
+        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
+        VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
+#endif
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        c->put_vp8_epel_pixels_tab[0][0][0]     =
+        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
+    }
+
+    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+        VP8_LUMA_MC_FUNC(0, 16, sse2);
+        VP8_MC_FUNC(1, 8, sse2);
+        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
+        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        VP8_LUMA_MC_FUNC(0, 16, ssse3);
+        VP8_MC_FUNC(1, 8, ssse3);
+        VP8_MC_FUNC(2, 4, ssse3);
+        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
+        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
+        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
+    }
+#endif /* HAVE_YASM */
+}
+
+av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
+        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
+#if ARCH_X86_32
+        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
+        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
+        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
+
+        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
+        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
+#endif
+    }
+
+    /* note that 4-tap width=16 functions are missing because w=16
+     * is only used for luma, and luma is always a copy or sixtap. */
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+#if ARCH_X86_32
+        c->vp8_v_loop_filter_simple   = ff_vp8_v_loop_filter_simple_mmxext;
+        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_mmxext;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
+#endif
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
+        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
+    }
+
+    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
+        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
+
+        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;
+
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
+
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
+        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
+
+        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
+        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
+        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
+        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
+
+        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
+        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;
+
+        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
+        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
+        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
+    }
+#endif /* HAVE_YASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm b/media/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm
new file mode 100644
index 000000000..98bb6696a
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp8dsp_loopfilter.asm
@@ -0,0 +1,1584 @@
+;******************************************************************************
+;* VP8 MMXEXT optimizations
+;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
+;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_27:    times 8 dw 27
+pw_63:    times 8 dw 63
+
+pb_4:     times 16 db 4
+pb_F8:    times 16 db 0xF8
+pb_FE:    times 16 db 0xFE
+pb_27_63: times 8 db 27, 63
+pb_18_63: times 8 db 18, 63
+pb_9_63:  times 8 db  9, 63
+
+cextern pb_1
+cextern pb_3
+cextern pw_9
+cextern pw_18
+cextern pb_80
+
+SECTION .text
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);
+;-----------------------------------------------------------------------------
+
+; macro called with 7 mm register indexes as argument, and 4 regular registers
+;
+; first 4 mm registers will carry the transposed pixel data
+; the other three are scratchspace (one would be sufficient, but this allows
+; for more spreading/pipelining and thus faster execution on OOE CPUs)
+;
+; first two regular registers are buf+4*stride and buf+5*stride
+; third is -stride, fourth is +stride
+%macro READ_8x4_INTERLEAVED 11
+    ; interleave 8 (A-H) rows of 4 pixels each
+    movd          m%1, [%8+%10*4]   ; A0-3
+    movd          m%5, [%9+%10*4]   ; B0-3
+    movd          m%2, [%8+%10*2]   ; C0-3
+    movd          m%6, [%8+%10]     ; D0-3
+    movd          m%3, [%8]         ; E0-3
+    movd          m%7, [%9]         ; F0-3
+    movd          m%4, [%9+%11]     ; G0-3
+    punpcklbw     m%1, m%5          ; A/B interleaved
+    movd          m%5, [%9+%11*2]   ; H0-3
+    punpcklbw     m%2, m%6          ; C/D interleaved
+    punpcklbw     m%3, m%7          ; E/F interleaved
+    punpcklbw     m%4, m%5          ; G/H interleaved
+%endmacro
+
+; macro called with 7 mm register indexes as argument, and 5 regular registers
+; first 11 mean the same as READ_8x4_TRANSPOSED above
+; fifth regular register is scratchspace to reach the bottom 8 rows, it
+; will be set to second regular register + 8*stride at the end
+%macro READ_16x4_INTERLEAVED 12
+    ; transpose 16 (A-P) rows of 4 pixels each
+    lea           %12, [r0+8*r2]
+
+    ; read (and interleave) those addressable by %8 (=r0), A/C/D/E/I/K/L/M
+    movd          m%1, [%8+%10*4]   ; A0-3
+    movd          m%3, [%12+%10*4]  ; I0-3
+    movd          m%2, [%8+%10*2]   ; C0-3
+    movd          m%4, [%12+%10*2]  ; K0-3
+    movd          m%6, [%8+%10]     ; D0-3
+    movd          m%5, [%12+%10]    ; L0-3
+    movd          m%7, [%12]        ; M0-3
+    add           %12, %11
+    punpcklbw     m%1, m%3          ; A/I
+    movd          m%3, [%8]         ; E0-3
+    punpcklbw     m%2, m%4          ; C/K
+    punpcklbw     m%6, m%5          ; D/L
+    punpcklbw     m%3, m%7          ; E/M
+    punpcklbw     m%2, m%6          ; C/D/K/L interleaved
+
+    ; read (and interleave) those addressable by %9 (=r4), B/F/G/H/J/N/O/P
+    movd         m%5, [%9+%10*4]   ; B0-3
+    movd         m%4, [%12+%10*4]  ; J0-3
+    movd         m%7, [%9]         ; F0-3
+    movd         m%6, [%12]        ; N0-3
+    punpcklbw    m%5, m%4          ; B/J
+    punpcklbw    m%7, m%6          ; F/N
+    punpcklbw    m%1, m%5          ; A/B/I/J interleaved
+    punpcklbw    m%3, m%7          ; E/F/M/N interleaved
+    movd         m%4, [%9+%11]     ; G0-3
+    movd         m%6, [%12+%11]    ; O0-3
+    movd         m%5, [%9+%11*2]   ; H0-3
+    movd         m%7, [%12+%11*2]  ; P0-3
+    punpcklbw    m%4, m%6          ; G/O
+    punpcklbw    m%5, m%7          ; H/P
+    punpcklbw    m%4, m%5          ; G/H/O/P interleaved
+%endmacro
+
+; write 4 mm registers of 2 dwords each
+; first four arguments are mm register indexes containing source data
+; last four are registers containing buf+4*stride, buf+5*stride,
+; -stride and +stride
+%macro WRITE_4x2D 8
+    ; write out (2 dwords per register)
+    movd    [%5+%7*4], m%1
+    movd    [%5+%7*2], m%2
+    movd         [%5], m%3
+    movd      [%6+%8], m%4
+    punpckhdq     m%1, m%1
+    punpckhdq     m%2, m%2
+    punpckhdq     m%3, m%3
+    punpckhdq     m%4, m%4
+    movd    [%6+%7*4], m%1
+    movd      [%5+%7], m%2
+    movd         [%6], m%3
+    movd    [%6+%8*2], m%4
+%endmacro
+
+; write 4 xmm registers of 4 dwords each
+; arguments same as WRITE_2x4D, but with an extra register, so that the 5 regular
+; registers contain buf+4*stride, buf+5*stride, buf+12*stride, -stride and +stride
+; we add 1*stride to the third regular registry in the process
+; the 10th argument is 16 if it's a Y filter (i.e. all regular registers cover the
+; same memory region), or 8 if they cover two separate buffers (third one points to
+; a different memory region than the first two), allowing for more optimal code for
+; the 16-width case
+%macro WRITE_4x4D 10
+    ; write out (4 dwords per register), start with dwords zero
+    movd    [%5+%8*4], m%1
+    movd         [%5], m%2
+    movd    [%7+%8*4], m%3
+    movd         [%7], m%4
+
+    ; store dwords 1
+    psrldq        m%1, 4
+    psrldq        m%2, 4
+    psrldq        m%3, 4
+    psrldq        m%4, 4
+    movd    [%6+%8*4], m%1
+    movd         [%6], m%2
+%if %10 == 16
+    movd    [%6+%9*4], m%3
+%endif
+    movd      [%7+%9], m%4
+
+    ; write dwords 2
+    psrldq        m%1, 4
+    psrldq        m%2, 4
+%if %10 == 8
+    movd    [%5+%8*2], m%1
+    movd          %5d, m%3
+%endif
+    psrldq        m%3, 4
+    psrldq        m%4, 4
+%if %10 == 16
+    movd    [%5+%8*2], m%1
+%endif
+    movd      [%6+%9], m%2
+    movd    [%7+%8*2], m%3
+    movd    [%7+%9*2], m%4
+    add            %7, %9
+
+    ; store dwords 3
+    psrldq        m%1, 4
+    psrldq        m%2, 4
+    psrldq        m%3, 4
+    psrldq        m%4, 4
+%if %10 == 8
+    mov     [%7+%8*4], %5d
+    movd    [%6+%8*2], m%1
+%else
+    movd      [%5+%8], m%1
+%endif
+    movd    [%6+%9*2], m%2
+    movd    [%7+%8*2], m%3
+    movd    [%7+%9*2], m%4
+%endmacro
+
+; write 4 or 8 words in the mmx/xmm registers as 8 lines
+; 1 and 2 are the registers to write, this can be the same (for SSE2)
+; for pre-SSE4:
+; 3 is a general-purpose register that we will clobber
+; for SSE4:
+; 3 is a pointer to the destination's 5th line
+; 4 is a pointer to the destination's 4th line
+; 5/6 is -stride and +stride
+%macro WRITE_2x4W 6
+    movd            %3d, %1
+    punpckhdq        %1, %1
+    mov       [%4+%5*4], %3w
+    shr              %3, 16
+    add              %4, %6
+    mov       [%4+%5*4], %3w
+
+    movd            %3d, %1
+    add              %4, %5
+    mov       [%4+%5*2], %3w
+    shr              %3, 16
+    mov       [%4+%5  ], %3w
+
+    movd            %3d, %2
+    punpckhdq        %2, %2
+    mov       [%4     ], %3w
+    shr              %3, 16
+    mov       [%4+%6  ], %3w
+
+    movd            %3d, %2
+    add              %4, %6
+    mov       [%4+%6  ], %3w
+    shr              %3, 16
+    mov       [%4+%6*2], %3w
+    add              %4, %5
+%endmacro
+
+%macro WRITE_8W 5
+%if cpuflag(sse4)
+    pextrw    [%3+%4*4], %1, 0
+    pextrw    [%2+%4*4], %1, 1
+    pextrw    [%3+%4*2], %1, 2
+    pextrw    [%3+%4  ], %1, 3
+    pextrw    [%3     ], %1, 4
+    pextrw    [%2     ], %1, 5
+    pextrw    [%2+%5  ], %1, 6
+    pextrw    [%2+%5*2], %1, 7
+%else
+    movd            %2d, %1
+    psrldq           %1, 4
+    mov       [%3+%4*4], %2w
+    shr              %2, 16
+    add              %3, %5
+    mov       [%3+%4*4], %2w
+
+    movd            %2d, %1
+    psrldq           %1, 4
+    add              %3, %4
+    mov       [%3+%4*2], %2w
+    shr              %2, 16
+    mov       [%3+%4  ], %2w
+
+    movd            %2d, %1
+    psrldq           %1, 4
+    mov       [%3     ], %2w
+    shr              %2, 16
+    mov       [%3+%5  ], %2w
+
+    movd            %2d, %1
+    add              %3, %5
+    mov       [%3+%5  ], %2w
+    shr              %2, 16
+    mov       [%3+%5*2], %2w
+%endif
+%endmacro
+
+%macro SIMPLE_LOOPFILTER 2
+cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
+%if mmsize == 8 ; mmx/mmxext
+    mov         cntrq, 2
+%endif
+%if cpuflag(ssse3)
+    pxor           m0, m0
+%endif
+    SPLATB_REG     m7, flim, m0     ; splat "flim" into register
+
+    ; set up indexes to address 4 rows
+%if mmsize == 8
+    DEFINE_ARGS dst1, mstride, stride, cntr, dst2
+%else
+    DEFINE_ARGS dst1, mstride, stride, dst3, dst2
+%endif
+    mov       strideq, mstrideq
+    neg      mstrideq
+%ifidn %1, h
+    lea         dst1q, [dst1q+4*strideq-2]
+%endif
+
+%if mmsize == 8 ; mmx / mmxext
+.next8px:
+%endif
+%ifidn %1, v
+    ; read 4 half/full rows of pixels
+    mova           m0, [dst1q+mstrideq*2]    ; p1
+    mova           m1, [dst1q+mstrideq]      ; p0
+    mova           m2, [dst1q]               ; q0
+    mova           m3, [dst1q+ strideq]      ; q1
+%else ; h
+    lea         dst2q, [dst1q+ strideq]
+
+%if mmsize == 8 ; mmx/mmxext
+    READ_8x4_INTERLEAVED  0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq
+%else ; sse2
+    READ_16x4_INTERLEAVED 0, 1, 2, 3, 4, 5, 6, dst1q, dst2q, mstrideq, strideq, dst3q
+%endif
+    TRANSPOSE4x4W         0, 1, 2, 3, 4
+%endif
+
+    ; simple_limit
+    mova           m5, m2           ; m5=backup of q0
+    mova           m6, m1           ; m6=backup of p0
+    psubusb        m1, m2           ; p0-q0
+    psubusb        m2, m6           ; q0-p0
+    por            m1, m2           ; FFABS(p0-q0)
+    paddusb        m1, m1           ; m1=FFABS(p0-q0)*2
+
+    mova           m4, m3
+    mova           m2, m0
+    psubusb        m3, m0           ; q1-p1
+    psubusb        m0, m4           ; p1-q1
+    por            m3, m0           ; FFABS(p1-q1)
+    mova           m0, [pb_80]
+    pxor           m2, m0
+    pxor           m4, m0
+    psubsb         m2, m4           ; m2=p1-q1 (signed) backup for below
+    pand           m3, [pb_FE]
+    psrlq          m3, 1            ; m3=FFABS(p1-q1)/2, this can be used signed
+    paddusb        m3, m1
+    psubusb        m3, m7
+    pxor           m1, m1
+    pcmpeqb        m3, m1           ; abs(p0-q0)*2+abs(p1-q1)/2<=flim mask(0xff/0x0)
+
+    ; filter_common (use m2/p1-q1, m4=q0, m6=p0, m5/q0-p0 and m3/mask)
+    mova           m4, m5
+    pxor           m5, m0
+    pxor           m0, m6
+    psubsb         m5, m0           ; q0-p0 (signed)
+    paddsb         m2, m5
+    paddsb         m2, m5
+    paddsb         m2, m5           ; a=(p1-q1) + 3*(q0-p0)
+    pand           m2, m3           ; apply filter mask (m3)
+
+    mova           m3, [pb_F8]
+    mova           m1, m2
+    paddsb         m2, [pb_4]       ; f1<<3=a+4
+    paddsb         m1, [pb_3]       ; f2<<3=a+3
+    pand           m2, m3
+    pand           m1, m3           ; cache f2<<3
+
+    pxor           m0, m0
+    pxor           m3, m3
+    pcmpgtb        m0, m2           ; which values are <0?
+    psubb          m3, m2           ; -f1<<3
+    psrlq          m2, 3            ; +f1
+    psrlq          m3, 3            ; -f1
+    pand           m3, m0
+    pandn          m0, m2
+    psubusb        m4, m0
+    paddusb        m4, m3           ; q0-f1
+
+    pxor           m0, m0
+    pxor           m3, m3
+    pcmpgtb        m0, m1           ; which values are <0?
+    psubb          m3, m1           ; -f2<<3
+    psrlq          m1, 3            ; +f2
+    psrlq          m3, 3            ; -f2
+    pand           m3, m0
+    pandn          m0, m1
+    paddusb        m6, m0
+    psubusb        m6, m3           ; p0+f2
+
+    ; store
+%ifidn %1, v
+    mova      [dst1q], m4
+    mova [dst1q+mstrideq], m6
+%else ; h
+    inc        dst1q
+    SBUTTERFLY    bw, 6, 4, 0
+
+%if mmsize == 16 ; sse2
+%if cpuflag(sse4)
+    inc         dst2q
+%endif
+    WRITE_8W       m6, dst2q, dst1q, mstrideq, strideq
+    lea         dst2q, [dst3q+mstrideq+1]
+%if cpuflag(sse4)
+    inc         dst3q
+%endif
+    WRITE_8W       m4, dst3q, dst2q, mstrideq, strideq
+%else ; mmx/mmxext
+    WRITE_2x4W     m6, m4, dst2q, dst1q, mstrideq, strideq
+%endif
+%endif
+
+%if mmsize == 8 ; mmx/mmxext
+    ; next 8 pixels
+%ifidn %1, v
+    add         dst1q, 8            ; advance 8 cols = pixels
+%else ; h
+    lea         dst1q, [dst1q+strideq*8-1]  ; advance 8 rows = lines
+%endif
+    dec         cntrq
+    jg .next8px
+    REP_RET
+%else ; sse2
+    RET
+%endif
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+SIMPLE_LOOPFILTER v, 4
+SIMPLE_LOOPFILTER h, 5
+INIT_MMX mmxext
+SIMPLE_LOOPFILTER v, 4
+SIMPLE_LOOPFILTER h, 5
+%endif
+
+INIT_XMM sse2
+SIMPLE_LOOPFILTER v, 3
+SIMPLE_LOOPFILTER h, 5
+INIT_XMM ssse3
+SIMPLE_LOOPFILTER v, 3
+SIMPLE_LOOPFILTER h, 5
+INIT_XMM sse4
+SIMPLE_LOOPFILTER h, 5
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
+;                                               int flimE, int flimI, int hev_thr);
+;-----------------------------------------------------------------------------
+
+%macro INNER_LOOPFILTER 2
+%define stack_size 0
+%ifndef m8   ; stack layout: [0]=E, [1]=I, [2]=hev_thr
+%ifidn %1, v ;               [3]=hev() result
+%define stack_size mmsize * -4
+%else ; h    ; extra storage space for transposes
+%define stack_size mmsize * -5
+%endif
+%endif
+
+%if %2 == 8 ; chroma
+cglobal vp8_%1_loop_filter8uv_inner, 6, 6, 13, stack_size, dst, dst8, stride, flimE, flimI, hevthr
+%else ; luma
+cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, stack_size, dst, stride, flimE, flimI, hevthr
+%endif
+
+%if cpuflag(ssse3)
+    pxor             m7, m7
+%endif
+
+%ifndef m8
+    ; splat function arguments
+    SPLATB_REG       m0, flimEq, m7   ; E
+    SPLATB_REG       m1, flimIq, m7   ; I
+    SPLATB_REG       m2, hevthrq, m7  ; hev_thresh
+
+%define m_flimE    [rsp]
+%define m_flimI    [rsp+mmsize]
+%define m_hevthr   [rsp+mmsize*2]
+%define m_maskres  [rsp+mmsize*3]
+%define m_p0backup [rsp+mmsize*3]
+%define m_q0backup [rsp+mmsize*4]
+
+    mova        m_flimE, m0
+    mova        m_flimI, m1
+    mova       m_hevthr, m2
+%else
+%define m_flimE    m9
+%define m_flimI    m10
+%define m_hevthr   m11
+%define m_maskres  m12
+%define m_p0backup m12
+%define m_q0backup m8
+
+    ; splat function arguments
+    SPLATB_REG  m_flimE, flimEq, m7   ; E
+    SPLATB_REG  m_flimI, flimIq, m7   ; I
+    SPLATB_REG m_hevthr, hevthrq, m7  ; hev_thresh
+%endif
+
+%if %2 == 8 ; chroma
+    DEFINE_ARGS dst1, dst8, mstride, stride, dst2
+%elif mmsize == 8
+    DEFINE_ARGS dst1, mstride, stride, dst2, cntr
+    mov           cntrq, 2
+%else
+    DEFINE_ARGS dst1, mstride, stride, dst2, dst8
+%endif
+    mov         strideq, mstrideq
+    neg        mstrideq
+%ifidn %1, h
+    lea           dst1q, [dst1q+strideq*4-4]
+%if %2 == 8 ; chroma
+    lea           dst8q, [dst8q+strideq*4-4]
+%endif
+%endif
+
+%if mmsize == 8
+.next8px:
+%endif
+    ; read
+    lea           dst2q, [dst1q+strideq]
+%ifidn %1, v
+%if %2 == 8 && mmsize == 16
+%define movrow movh
+%else
+%define movrow mova
+%endif
+    movrow           m0, [dst1q+mstrideq*4] ; p3
+    movrow           m1, [dst2q+mstrideq*4] ; p2
+    movrow           m2, [dst1q+mstrideq*2] ; p1
+    movrow           m5, [dst2q]            ; q1
+    movrow           m6, [dst2q+ strideq*1] ; q2
+    movrow           m7, [dst2q+ strideq*2] ; q3
+%if mmsize == 16 && %2 == 8
+    movhps           m0, [dst8q+mstrideq*4]
+    movhps           m2, [dst8q+mstrideq*2]
+    add           dst8q, strideq
+    movhps           m1, [dst8q+mstrideq*4]
+    movhps           m5, [dst8q]
+    movhps           m6, [dst8q+ strideq  ]
+    movhps           m7, [dst8q+ strideq*2]
+    add           dst8q, mstrideq
+%endif
+%elif mmsize == 8 ; mmx/mmxext (h)
+    ; read 8 rows of 8px each
+    movu             m0, [dst1q+mstrideq*4]
+    movu             m1, [dst2q+mstrideq*4]
+    movu             m2, [dst1q+mstrideq*2]
+    movu             m3, [dst1q+mstrideq  ]
+    movu             m4, [dst1q]
+    movu             m5, [dst2q]
+    movu             m6, [dst2q+ strideq  ]
+
+    ; 8x8 transpose
+    TRANSPOSE4x4B     0, 1, 2, 3, 7
+    mova     m_q0backup, m1
+    movu             m7, [dst2q+ strideq*2]
+    TRANSPOSE4x4B     4, 5, 6, 7, 1
+    SBUTTERFLY       dq, 0, 4, 1     ; p3/p2
+    SBUTTERFLY       dq, 2, 6, 1     ; q0/q1
+    SBUTTERFLY       dq, 3, 7, 1     ; q2/q3
+    mova             m1, m_q0backup
+    mova     m_q0backup, m2          ; store q0
+    SBUTTERFLY       dq, 1, 5, 2     ; p1/p0
+    mova     m_p0backup, m5          ; store p0
+    SWAP              1, 4
+    SWAP              2, 4
+    SWAP              6, 3
+    SWAP              5, 3
+%else ; sse2 (h)
+%if %2 == 16
+    lea           dst8q, [dst1q+ strideq*8]
+%endif
+
+    ; read 16 rows of 8px each, interleave
+    movh             m0, [dst1q+mstrideq*4]
+    movh             m1, [dst8q+mstrideq*4]
+    movh             m2, [dst1q+mstrideq*2]
+    movh             m5, [dst8q+mstrideq*2]
+    movh             m3, [dst1q+mstrideq  ]
+    movh             m6, [dst8q+mstrideq  ]
+    movh             m4, [dst1q]
+    movh             m7, [dst8q]
+    punpcklbw        m0, m1          ; A/I
+    punpcklbw        m2, m5          ; C/K
+    punpcklbw        m3, m6          ; D/L
+    punpcklbw        m4, m7          ; E/M
+
+    add           dst8q, strideq
+    movh             m1, [dst2q+mstrideq*4]
+    movh             m6, [dst8q+mstrideq*4]
+    movh             m5, [dst2q]
+    movh             m7, [dst8q]
+    punpcklbw        m1, m6          ; B/J
+    punpcklbw        m5, m7          ; F/N
+    movh             m6, [dst2q+ strideq  ]
+    movh             m7, [dst8q+ strideq  ]
+    punpcklbw        m6, m7          ; G/O
+
+    ; 8x16 transpose
+    TRANSPOSE4x4B     0, 1, 2, 3, 7
+%ifdef m8
+    SWAP              1, 8
+%else
+    mova     m_q0backup, m1
+%endif
+    movh             m7, [dst2q+ strideq*2]
+    movh             m1, [dst8q+ strideq*2]
+    punpcklbw        m7, m1          ; H/P
+    TRANSPOSE4x4B     4, 5, 6, 7, 1
+    SBUTTERFLY       dq, 0, 4, 1     ; p3/p2
+    SBUTTERFLY       dq, 2, 6, 1     ; q0/q1
+    SBUTTERFLY       dq, 3, 7, 1     ; q2/q3
+%ifdef m8
+    SWAP              1, 8
+    SWAP              2, 8
+%else
+    mova             m1, m_q0backup
+    mova     m_q0backup, m2          ; store q0
+%endif
+    SBUTTERFLY       dq, 1, 5, 2     ; p1/p0
+%ifdef m12
+    SWAP              5, 12
+%else
+    mova     m_p0backup, m5          ; store p0
+%endif
+    SWAP              1, 4
+    SWAP              2, 4
+    SWAP              6, 3
+    SWAP              5, 3
+%endif
+
+    ; normal_limit for p3-p2, p2-p1, q3-q2 and q2-q1
+    mova             m4, m1
+    SWAP              4, 1
+    psubusb          m4, m0          ; p2-p3
+    psubusb          m0, m1          ; p3-p2
+    por              m0, m4          ; abs(p3-p2)
+
+    mova             m4, m2
+    SWAP              4, 2
+    psubusb          m4, m1          ; p1-p2
+    psubusb          m1, m2          ; p2-p1
+    por              m1, m4          ; abs(p2-p1)
+
+    mova             m4, m6
+    SWAP              4, 6
+    psubusb          m4, m7          ; q2-q3
+    psubusb          m7, m6          ; q3-q2
+    por              m7, m4          ; abs(q3-q2)
+
+    mova             m4, m5
+    SWAP              4, 5
+    psubusb          m4, m6          ; q1-q2
+    psubusb          m6, m5          ; q2-q1
+    por              m6, m4          ; abs(q2-q1)
+
+%if notcpuflag(mmxext)
+    mova             m4, m_flimI
+    pxor             m3, m3
+    psubusb          m0, m4
+    psubusb          m1, m4
+    psubusb          m7, m4
+    psubusb          m6, m4
+    pcmpeqb          m0, m3          ; abs(p3-p2) <= I
+    pcmpeqb          m1, m3          ; abs(p2-p1) <= I
+    pcmpeqb          m7, m3          ; abs(q3-q2) <= I
+    pcmpeqb          m6, m3          ; abs(q2-q1) <= I
+    pand             m0, m1
+    pand             m7, m6
+    pand             m0, m7
+%else ; mmxext/sse2
+    pmaxub           m0, m1
+    pmaxub           m6, m7
+    pmaxub           m0, m6
+%endif
+
+    ; normal_limit and high_edge_variance for p1-p0, q1-q0
+    SWAP              7, 3           ; now m7 is zero
+%ifidn %1, v
+    movrow           m3, [dst1q+mstrideq  ] ; p0
+%if mmsize == 16 && %2 == 8
+    movhps           m3, [dst8q+mstrideq  ]
+%endif
+%elifdef m12
+    SWAP              3, 12
+%else
+    mova             m3, m_p0backup
+%endif
+
+    mova             m1, m2
+    SWAP              1, 2
+    mova             m6, m3
+    SWAP              3, 6
+    psubusb          m1, m3          ; p1-p0
+    psubusb          m6, m2          ; p0-p1
+    por              m1, m6          ; abs(p1-p0)
+%if notcpuflag(mmxext)
+    mova             m6, m1
+    psubusb          m1, m4
+    psubusb          m6, m_hevthr
+    pcmpeqb          m1, m7          ; abs(p1-p0) <= I
+    pcmpeqb          m6, m7          ; abs(p1-p0) <= hev_thresh
+    pand             m0, m1
+    mova      m_maskres, m6
+%else ; mmxext/sse2
+    pmaxub           m0, m1          ; max_I
+    SWAP              1, 4           ; max_hev_thresh
+%endif
+
+    SWAP              6, 4           ; now m6 is I
+%ifidn %1, v
+    movrow           m4, [dst1q]     ; q0
+%if mmsize == 16 && %2 == 8
+    movhps           m4, [dst8q]
+%endif
+%elifdef m8
+    SWAP              4, 8
+%else
+    mova             m4, m_q0backup
+%endif
+    mova             m1, m4
+    SWAP              1, 4
+    mova             m7, m5
+    SWAP              7, 5
+    psubusb          m1, m5          ; q0-q1
+    psubusb          m7, m4          ; q1-q0
+    por              m1, m7          ; abs(q1-q0)
+%if notcpuflag(mmxext)
+    mova             m7, m1
+    psubusb          m1, m6
+    psubusb          m7, m_hevthr
+    pxor             m6, m6
+    pcmpeqb          m1, m6          ; abs(q1-q0) <= I
+    pcmpeqb          m7, m6          ; abs(q1-q0) <= hev_thresh
+    mova             m6, m_maskres
+    pand             m0, m1          ; abs([pq][321]-[pq][210]) <= I
+    pand             m6, m7
+%else ; mmxext/sse2
+    pxor             m7, m7
+    pmaxub           m0, m1
+    pmaxub           m6, m1
+    psubusb          m0, m_flimI
+    psubusb          m6, m_hevthr
+    pcmpeqb          m0, m7          ; max(abs(..)) <= I
+    pcmpeqb          m6, m7          ; !(max(abs..) > thresh)
+%endif
+%ifdef m12
+    SWAP              6, 12
+%else
+    mova      m_maskres, m6          ; !(abs(p1-p0) > hev_t || abs(q1-q0) > hev_t)
+%endif
+
+    ; simple_limit
+    mova             m1, m3
+    SWAP              1, 3
+    mova             m6, m4          ; keep copies of p0/q0 around for later use
+    SWAP              6, 4
+    psubusb          m1, m4          ; p0-q0
+    psubusb          m6, m3          ; q0-p0
+    por              m1, m6          ; abs(q0-p0)
+    paddusb          m1, m1          ; m1=2*abs(q0-p0)
+
+    mova             m7, m2
+    SWAP              7, 2
+    mova             m6, m5
+    SWAP              6, 5
+    psubusb          m7, m5          ; p1-q1
+    psubusb          m6, m2          ; q1-p1
+    por              m7, m6          ; abs(q1-p1)
+    pxor             m6, m6
+    pand             m7, [pb_FE]
+    psrlq            m7, 1           ; abs(q1-p1)/2
+    paddusb          m7, m1          ; abs(q0-p0)*2+abs(q1-p1)/2
+    psubusb          m7, m_flimE
+    pcmpeqb          m7, m6          ; abs(q0-p0)*2+abs(q1-p1)/2 <= E
+    pand             m0, m7          ; normal_limit result
+
+    ; filter_common; at this point, m2-m5=p1-q1 and m0 is filter_mask
+%ifdef m8 ; x86-64 && sse2
+    mova             m8, [pb_80]
+%define m_pb_80 m8
+%else ; x86-32 or mmx/mmxext
+%define m_pb_80 [pb_80]
+%endif
+    mova             m1, m4
+    mova             m7, m3
+    pxor             m1, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m1, m7          ; (signed) q0-p0
+    mova             m6, m2
+    mova             m7, m5
+    pxor             m6, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m6, m7          ; (signed) p1-q1
+    mova             m7, m_maskres
+    pandn            m7, m6
+    paddsb           m7, m1
+    paddsb           m7, m1
+    paddsb           m7, m1          ; 3*(q0-p0)+is4tap?(p1-q1)
+
+    pand             m7, m0
+    mova             m1, [pb_F8]
+    mova             m6, m7
+    paddsb           m7, [pb_3]
+    paddsb           m6, [pb_4]
+    pand             m7, m1
+    pand             m6, m1
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m1, m7
+    psubb            m0, m7
+    psrlq            m7, 3           ; +f2
+    psrlq            m0, 3           ; -f2
+    pand             m0, m1
+    pandn            m1, m7
+    psubusb          m3, m0
+    paddusb          m3, m1          ; p0+f2
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m0, m6
+    psubb            m1, m6
+    psrlq            m6, 3           ; +f1
+    psrlq            m1, 3           ; -f1
+    pand             m1, m0
+    pandn            m0, m6
+    psubusb          m4, m0
+    paddusb          m4, m1          ; q0-f1
+
+%ifdef m12
+    SWAP              6, 12
+%else
+    mova             m6, m_maskres
+%endif
+%if notcpuflag(mmxext)
+    mova             m7, [pb_1]
+%else ; mmxext/sse2
+    pxor             m7, m7
+%endif
+    pand             m0, m6
+    pand             m1, m6
+%if notcpuflag(mmxext)
+    paddusb          m0, m7
+    pand             m1, [pb_FE]
+    pandn            m7, m0
+    psrlq            m1, 1
+    psrlq            m7, 1
+    SWAP              0, 7
+%else ; mmxext/sse2
+    psubusb          m1, [pb_1]
+    pavgb            m0, m7          ; a
+    pavgb            m1, m7          ; -a
+%endif
+    psubusb          m5, m0
+    psubusb          m2, m1
+    paddusb          m5, m1          ; q1-a
+    paddusb          m2, m0          ; p1+a
+
+    ; store
+%ifidn %1, v
+    movrow [dst1q+mstrideq*2], m2
+    movrow [dst1q+mstrideq  ], m3
+    movrow      [dst1q], m4
+    movrow [dst1q+ strideq  ], m5
+%if mmsize == 16 && %2 == 8
+    movhps [dst8q+mstrideq*2], m2
+    movhps [dst8q+mstrideq  ], m3
+    movhps      [dst8q], m4
+    movhps [dst8q+ strideq  ], m5
+%endif
+%else ; h
+    add           dst1q, 2
+    add           dst2q, 2
+
+    ; 4x8/16 transpose
+    TRANSPOSE4x4B     2, 3, 4, 5, 6
+
+%if mmsize == 8 ; mmx/mmxext (h)
+    WRITE_4x2D        2, 3, 4, 5, dst1q, dst2q, mstrideq, strideq
+%else ; sse2 (h)
+    lea           dst8q, [dst8q+mstrideq  +2]
+    WRITE_4x4D        2, 3, 4, 5, dst1q, dst2q, dst8q, mstrideq, strideq, %2
+%endif
+%endif
+
+%if mmsize == 8
+%if %2 == 8 ; chroma
+%ifidn %1, h
+    sub           dst1q, 2
+%endif
+    cmp           dst1q, dst8q
+    mov           dst1q, dst8q
+    jnz .next8px
+%else
+%ifidn %1, h
+    lea           dst1q, [dst1q+ strideq*8-2]
+%else ; v
+    add           dst1q, 8
+%endif
+    dec           cntrq
+    jg .next8px
+%endif
+    REP_RET
+%else ; mmsize == 16
+    RET
+%endif
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+INNER_LOOPFILTER v, 16
+INNER_LOOPFILTER h, 16
+INNER_LOOPFILTER v,  8
+INNER_LOOPFILTER h,  8
+
+INIT_MMX mmxext
+INNER_LOOPFILTER v, 16
+INNER_LOOPFILTER h, 16
+INNER_LOOPFILTER v,  8
+INNER_LOOPFILTER h,  8
+%endif
+
+INIT_XMM sse2
+INNER_LOOPFILTER v, 16
+INNER_LOOPFILTER h, 16
+INNER_LOOPFILTER v,  8
+INNER_LOOPFILTER h,  8
+
+INIT_XMM ssse3
+INNER_LOOPFILTER v, 16
+INNER_LOOPFILTER h, 16
+INNER_LOOPFILTER v,  8
+INNER_LOOPFILTER h,  8
+
+;-----------------------------------------------------------------------------
+; void ff_vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
+;                                                int flimE, int flimI, int hev_thr);
+;-----------------------------------------------------------------------------
+
+%macro MBEDGE_LOOPFILTER 2
+%define stack_size 0
+%ifndef m8       ; stack layout: [0]=E, [1]=I, [2]=hev_thr
+%if mmsize == 16 ;               [3]=hev() result
+                 ;               [4]=filter tmp result
+                 ;               [5]/[6] = p2/q2 backup
+                 ;               [7]=lim_res sign result
+%define stack_size mmsize * -7
+%else ; 8        ; extra storage space for transposes
+%define stack_size mmsize * -8
+%endif
+%endif
+
+%if %2 == 8 ; chroma
+cglobal vp8_%1_loop_filter8uv_mbedge, 6, 6, 15, stack_size, dst1, dst8, stride, flimE, flimI, hevthr
+%else ; luma
+cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, stack_size, dst1, stride, flimE, flimI, hevthr
+%endif
+
+%if cpuflag(ssse3)
+    pxor             m7, m7
+%endif
+
+%ifndef m8
+    ; splat function arguments
+    SPLATB_REG       m0, flimEq, m7   ; E
+    SPLATB_REG       m1, flimIq, m7   ; I
+    SPLATB_REG       m2, hevthrq, m7  ; hev_thresh
+
+%define m_flimE    [rsp]
+%define m_flimI    [rsp+mmsize]
+%define m_hevthr   [rsp+mmsize*2]
+%define m_maskres  [rsp+mmsize*3]
+%define m_limres   [rsp+mmsize*4]
+%define m_p0backup [rsp+mmsize*3]
+%define m_q0backup [rsp+mmsize*4]
+%define m_p2backup [rsp+mmsize*5]
+%define m_q2backup [rsp+mmsize*6]
+%if mmsize == 16
+%define m_limsign  [rsp]
+%else
+%define m_limsign  [rsp+mmsize*7]
+%endif
+
+    mova        m_flimE, m0
+    mova        m_flimI, m1
+    mova       m_hevthr, m2
+%else ; sse2 on x86-64
+%define m_flimE    m9
+%define m_flimI    m10
+%define m_hevthr   m11
+%define m_maskres  m12
+%define m_limres   m8
+%define m_p0backup m12
+%define m_q0backup m8
+%define m_p2backup m13
+%define m_q2backup m14
+%define m_limsign  m9
+
+    ; splat function arguments
+    SPLATB_REG  m_flimE, flimEq, m7   ; E
+    SPLATB_REG  m_flimI, flimIq, m7   ; I
+    SPLATB_REG m_hevthr, hevthrq, m7  ; hev_thresh
+%endif
+
+%if %2 == 8 ; chroma
+    DEFINE_ARGS dst1, dst8, mstride, stride, dst2
+%elif mmsize == 8
+    DEFINE_ARGS dst1, mstride, stride, dst2, cntr
+    mov           cntrq, 2
+%else
+    DEFINE_ARGS dst1, mstride, stride, dst2, dst8
+%endif
+    mov         strideq, mstrideq
+    neg        mstrideq
+%ifidn %1, h
+    lea           dst1q, [dst1q+strideq*4-4]
+%if %2 == 8 ; chroma
+    lea           dst8q, [dst8q+strideq*4-4]
+%endif
+%endif
+
+%if mmsize == 8
+.next8px:
+%endif
+    ; read
+    lea           dst2q, [dst1q+ strideq  ]
+%ifidn %1, v
+%if %2 == 8 && mmsize == 16
+%define movrow movh
+%else
+%define movrow mova
+%endif
+    movrow           m0, [dst1q+mstrideq*4] ; p3
+    movrow           m1, [dst2q+mstrideq*4] ; p2
+    movrow           m2, [dst1q+mstrideq*2] ; p1
+    movrow           m5, [dst2q]            ; q1
+    movrow           m6, [dst2q+ strideq  ] ; q2
+    movrow           m7, [dst2q+ strideq*2] ; q3
+%if mmsize == 16 && %2 == 8
+    movhps           m0, [dst8q+mstrideq*4]
+    movhps           m2, [dst8q+mstrideq*2]
+    add           dst8q, strideq
+    movhps           m1, [dst8q+mstrideq*4]
+    movhps           m5, [dst8q]
+    movhps           m6, [dst8q+ strideq  ]
+    movhps           m7, [dst8q+ strideq*2]
+    add           dst8q, mstrideq
+%endif
+%elif mmsize == 8 ; mmx/mmxext (h)
+    ; read 8 rows of 8px each
+    movu             m0, [dst1q+mstrideq*4]
+    movu             m1, [dst2q+mstrideq*4]
+    movu             m2, [dst1q+mstrideq*2]
+    movu             m3, [dst1q+mstrideq  ]
+    movu             m4, [dst1q]
+    movu             m5, [dst2q]
+    movu             m6, [dst2q+ strideq  ]
+
+    ; 8x8 transpose
+    TRANSPOSE4x4B     0, 1, 2, 3, 7
+    mova     m_q0backup, m1
+    movu             m7, [dst2q+ strideq*2]
+    TRANSPOSE4x4B     4, 5, 6, 7, 1
+    SBUTTERFLY       dq, 0, 4, 1     ; p3/p2
+    SBUTTERFLY       dq, 2, 6, 1     ; q0/q1
+    SBUTTERFLY       dq, 3, 7, 1     ; q2/q3
+    mova             m1, m_q0backup
+    mova     m_q0backup, m2          ; store q0
+    SBUTTERFLY       dq, 1, 5, 2     ; p1/p0
+    mova     m_p0backup, m5          ; store p0
+    SWAP              1, 4
+    SWAP              2, 4
+    SWAP              6, 3
+    SWAP              5, 3
+%else ; sse2 (h)
+%if %2 == 16
+    lea           dst8q, [dst1q+ strideq*8  ]
+%endif
+
+    ; read 16 rows of 8px each, interleave
+    movh             m0, [dst1q+mstrideq*4]
+    movh             m1, [dst8q+mstrideq*4]
+    movh             m2, [dst1q+mstrideq*2]
+    movh             m5, [dst8q+mstrideq*2]
+    movh             m3, [dst1q+mstrideq  ]
+    movh             m6, [dst8q+mstrideq  ]
+    movh             m4, [dst1q]
+    movh             m7, [dst8q]
+    punpcklbw        m0, m1          ; A/I
+    punpcklbw        m2, m5          ; C/K
+    punpcklbw        m3, m6          ; D/L
+    punpcklbw        m4, m7          ; E/M
+
+    add           dst8q, strideq
+    movh             m1, [dst2q+mstrideq*4]
+    movh             m6, [dst8q+mstrideq*4]
+    movh             m5, [dst2q]
+    movh             m7, [dst8q]
+    punpcklbw        m1, m6          ; B/J
+    punpcklbw        m5, m7          ; F/N
+    movh             m6, [dst2q+ strideq  ]
+    movh             m7, [dst8q+ strideq  ]
+    punpcklbw        m6, m7          ; G/O
+
+    ; 8x16 transpose
+    TRANSPOSE4x4B     0, 1, 2, 3, 7
+%ifdef m8
+    SWAP              1, 8
+%else
+    mova     m_q0backup, m1
+%endif
+    movh             m7, [dst2q+ strideq*2]
+    movh             m1, [dst8q+ strideq*2]
+    punpcklbw        m7, m1          ; H/P
+    TRANSPOSE4x4B     4, 5, 6, 7, 1
+    SBUTTERFLY       dq, 0, 4, 1     ; p3/p2
+    SBUTTERFLY       dq, 2, 6, 1     ; q0/q1
+    SBUTTERFLY       dq, 3, 7, 1     ; q2/q3
+%ifdef m8
+    SWAP              1, 8
+    SWAP              2, 8
+%else
+    mova             m1, m_q0backup
+    mova     m_q0backup, m2          ; store q0
+%endif
+    SBUTTERFLY       dq, 1, 5, 2     ; p1/p0
+%ifdef m12
+    SWAP              5, 12
+%else
+    mova     m_p0backup, m5          ; store p0
+%endif
+    SWAP              1, 4
+    SWAP              2, 4
+    SWAP              6, 3
+    SWAP              5, 3
+%endif
+
+    ; normal_limit for p3-p2, p2-p1, q3-q2 and q2-q1
+    mova             m4, m1
+    SWAP              4, 1
+    psubusb          m4, m0          ; p2-p3
+    psubusb          m0, m1          ; p3-p2
+    por              m0, m4          ; abs(p3-p2)
+
+    mova             m4, m2
+    SWAP              4, 2
+    psubusb          m4, m1          ; p1-p2
+    mova     m_p2backup, m1
+    psubusb          m1, m2          ; p2-p1
+    por              m1, m4          ; abs(p2-p1)
+
+    mova             m4, m6
+    SWAP              4, 6
+    psubusb          m4, m7          ; q2-q3
+    psubusb          m7, m6          ; q3-q2
+    por              m7, m4          ; abs(q3-q2)
+
+    mova             m4, m5
+    SWAP              4, 5
+    psubusb          m4, m6          ; q1-q2
+    mova     m_q2backup, m6
+    psubusb          m6, m5          ; q2-q1
+    por              m6, m4          ; abs(q2-q1)
+
+%if notcpuflag(mmxext)
+    mova             m4, m_flimI
+    pxor             m3, m3
+    psubusb          m0, m4
+    psubusb          m1, m4
+    psubusb          m7, m4
+    psubusb          m6, m4
+    pcmpeqb          m0, m3          ; abs(p3-p2) <= I
+    pcmpeqb          m1, m3          ; abs(p2-p1) <= I
+    pcmpeqb          m7, m3          ; abs(q3-q2) <= I
+    pcmpeqb          m6, m3          ; abs(q2-q1) <= I
+    pand             m0, m1
+    pand             m7, m6
+    pand             m0, m7
+%else ; mmxext/sse2
+    pmaxub           m0, m1
+    pmaxub           m6, m7
+    pmaxub           m0, m6
+%endif
+
+    ; normal_limit and high_edge_variance for p1-p0, q1-q0
+    SWAP              7, 3           ; now m7 is zero
+%ifidn %1, v
+    movrow           m3, [dst1q+mstrideq  ] ; p0
+%if mmsize == 16 && %2 == 8
+    movhps           m3, [dst8q+mstrideq  ]
+%endif
+%elifdef m12
+    SWAP              3, 12
+%else
+    mova             m3, m_p0backup
+%endif
+
+    mova             m1, m2
+    SWAP              1, 2
+    mova             m6, m3
+    SWAP              3, 6
+    psubusb          m1, m3          ; p1-p0
+    psubusb          m6, m2          ; p0-p1
+    por              m1, m6          ; abs(p1-p0)
+%if notcpuflag(mmxext)
+    mova             m6, m1
+    psubusb          m1, m4
+    psubusb          m6, m_hevthr
+    pcmpeqb          m1, m7          ; abs(p1-p0) <= I
+    pcmpeqb          m6, m7          ; abs(p1-p0) <= hev_thresh
+    pand             m0, m1
+    mova      m_maskres, m6
+%else ; mmxext/sse2
+    pmaxub           m0, m1          ; max_I
+    SWAP              1, 4           ; max_hev_thresh
+%endif
+
+    SWAP              6, 4           ; now m6 is I
+%ifidn %1, v
+    movrow           m4, [dst1q]     ; q0
+%if mmsize == 16 && %2 == 8
+    movhps           m4, [dst8q]
+%endif
+%elifdef m8
+    SWAP              4, 8
+%else
+    mova             m4, m_q0backup
+%endif
+    mova             m1, m4
+    SWAP              1, 4
+    mova             m7, m5
+    SWAP              7, 5
+    psubusb          m1, m5          ; q0-q1
+    psubusb          m7, m4          ; q1-q0
+    por              m1, m7          ; abs(q1-q0)
+%if notcpuflag(mmxext)
+    mova             m7, m1
+    psubusb          m1, m6
+    psubusb          m7, m_hevthr
+    pxor             m6, m6
+    pcmpeqb          m1, m6          ; abs(q1-q0) <= I
+    pcmpeqb          m7, m6          ; abs(q1-q0) <= hev_thresh
+    mova             m6, m_maskres
+    pand             m0, m1          ; abs([pq][321]-[pq][210]) <= I
+    pand             m6, m7
+%else ; mmxext/sse2
+    pxor             m7, m7
+    pmaxub           m0, m1
+    pmaxub           m6, m1
+    psubusb          m0, m_flimI
+    psubusb          m6, m_hevthr
+    pcmpeqb          m0, m7          ; max(abs(..)) <= I
+    pcmpeqb          m6, m7          ; !(max(abs..) > thresh)
+%endif
+%ifdef m12
+    SWAP              6, 12
+%else
+    mova      m_maskres, m6          ; !(abs(p1-p0) > hev_t || abs(q1-q0) > hev_t)
+%endif
+
+    ; simple_limit
+    mova             m1, m3
+    SWAP              1, 3
+    mova             m6, m4          ; keep copies of p0/q0 around for later use
+    SWAP              6, 4
+    psubusb          m1, m4          ; p0-q0
+    psubusb          m6, m3          ; q0-p0
+    por              m1, m6          ; abs(q0-p0)
+    paddusb          m1, m1          ; m1=2*abs(q0-p0)
+
+    mova             m7, m2
+    SWAP              7, 2
+    mova             m6, m5
+    SWAP              6, 5
+    psubusb          m7, m5          ; p1-q1
+    psubusb          m6, m2          ; q1-p1
+    por              m7, m6          ; abs(q1-p1)
+    pxor             m6, m6
+    pand             m7, [pb_FE]
+    psrlq            m7, 1           ; abs(q1-p1)/2
+    paddusb          m7, m1          ; abs(q0-p0)*2+abs(q1-p1)/2
+    psubusb          m7, m_flimE
+    pcmpeqb          m7, m6          ; abs(q0-p0)*2+abs(q1-p1)/2 <= E
+    pand             m0, m7          ; normal_limit result
+
+    ; filter_common; at this point, m2-m5=p1-q1 and m0 is filter_mask
+%ifdef m8 ; x86-64 && sse2
+    mova             m8, [pb_80]
+%define m_pb_80 m8
+%else ; x86-32 or mmx/mmxext
+%define m_pb_80 [pb_80]
+%endif
+    mova             m1, m4
+    mova             m7, m3
+    pxor             m1, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m1, m7          ; (signed) q0-p0
+    mova             m6, m2
+    mova             m7, m5
+    pxor             m6, m_pb_80
+    pxor             m7, m_pb_80
+    psubsb           m6, m7          ; (signed) p1-q1
+    mova             m7, m_maskres
+    paddsb           m6, m1
+    paddsb           m6, m1
+    paddsb           m6, m1
+    pand             m6, m0
+%ifdef m8
+    mova       m_limres, m6          ; 3*(qp-p0)+(p1-q1) masked for filter_mbedge
+    pand       m_limres, m7
+%else
+    mova             m0, m6
+    pand             m0, m7
+    mova       m_limres, m0
+%endif
+    pandn            m7, m6          ; 3*(q0-p0)+(p1-q1) masked for filter_common
+
+    mova             m1, [pb_F8]
+    mova             m6, m7
+    paddsb           m7, [pb_3]
+    paddsb           m6, [pb_4]
+    pand             m7, m1
+    pand             m6, m1
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m1, m7
+    psubb            m0, m7
+    psrlq            m7, 3           ; +f2
+    psrlq            m0, 3           ; -f2
+    pand             m0, m1
+    pandn            m1, m7
+    psubusb          m3, m0
+    paddusb          m3, m1          ; p0+f2
+
+    pxor             m1, m1
+    pxor             m0, m0
+    pcmpgtb          m0, m6
+    psubb            m1, m6
+    psrlq            m6, 3           ; +f1
+    psrlq            m1, 3           ; -f1
+    pand             m1, m0
+    pandn            m0, m6
+    psubusb          m4, m0
+    paddusb          m4, m1          ; q0-f1
+
+    ; filter_mbedge (m2-m5 = p1-q1; lim_res carries w)
+%if cpuflag(ssse3)
+    mova             m7, [pb_1]
+%else
+    mova             m7, [pw_63]
+%endif
+%ifdef m8
+    SWAP              1, 8
+%else
+    mova             m1, m_limres
+%endif
+    pxor             m0, m0
+    mova             m6, m1
+    pcmpgtb          m0, m1         ; which are negative
+%if cpuflag(ssse3)
+    punpcklbw        m6, m7         ; interleave with "1" for rounding
+    punpckhbw        m1, m7
+%else
+    punpcklbw        m6, m0         ; signed byte->word
+    punpckhbw        m1, m0
+%endif
+    mova      m_limsign, m0
+%if cpuflag(ssse3)
+    mova             m7, [pb_27_63]
+%ifndef m8
+    mova       m_limres, m1
+%endif
+%ifdef m10
+    SWAP              0, 10         ; don't lose lim_sign copy
+%endif
+    mova             m0, m7
+    pmaddubsw        m7, m6
+    SWAP              6, 7
+    pmaddubsw        m0, m1
+    SWAP              1, 0
+%ifdef m10
+    SWAP              0, 10
+%else
+    mova             m0, m_limsign
+%endif
+%else
+    mova      m_maskres, m6         ; backup for later in filter
+    mova       m_limres, m1
+    pmullw          m6, [pw_27]
+    pmullw          m1, [pw_27]
+    paddw           m6, m7
+    paddw           m1, m7
+%endif
+    psraw           m6, 7
+    psraw           m1, 7
+    packsswb        m6, m1          ; a0
+    pxor            m1, m1
+    psubb           m1, m6
+    pand            m1, m0          ; -a0
+    pandn           m0, m6          ; +a0
+%if cpuflag(ssse3)
+    mova            m6, [pb_18_63]  ; pipelining
+%endif
+    psubusb         m3, m1
+    paddusb         m4, m1
+    paddusb         m3, m0          ; p0+a0
+    psubusb         m4, m0          ; q0-a0
+
+%if cpuflag(ssse3)
+    SWAP             6, 7
+%ifdef m10
+    SWAP             1, 10
+%else
+    mova            m1, m_limres
+%endif
+    mova            m0, m7
+    pmaddubsw       m7, m6
+    SWAP             6, 7
+    pmaddubsw       m0, m1
+    SWAP             1, 0
+%ifdef m10
+    SWAP             0, 10
+%endif
+    mova            m0, m_limsign
+%else
+    mova            m6, m_maskres
+    mova            m1, m_limres
+    pmullw          m6, [pw_18]
+    pmullw          m1, [pw_18]
+    paddw           m6, m7
+    paddw           m1, m7
+%endif
+    mova            m0, m_limsign
+    psraw           m6, 7
+    psraw           m1, 7
+    packsswb        m6, m1          ; a1
+    pxor            m1, m1
+    psubb           m1, m6
+    pand            m1, m0          ; -a1
+    pandn           m0, m6          ; +a1
+%if cpuflag(ssse3)
+    mova            m6, [pb_9_63]
+%endif
+    psubusb         m2, m1
+    paddusb         m5, m1
+    paddusb         m2, m0          ; p1+a1
+    psubusb         m5, m0          ; q1-a1
+
+%if cpuflag(ssse3)
+    SWAP             6, 7
+%ifdef m10
+    SWAP             1, 10
+%else
+    mova            m1, m_limres
+%endif
+    mova            m0, m7
+    pmaddubsw       m7, m6
+    SWAP             6, 7
+    pmaddubsw       m0, m1
+    SWAP             1, 0
+%else
+%ifdef m8
+    SWAP             6, 12
+    SWAP             1, 8
+%else
+    mova            m6, m_maskres
+    mova            m1, m_limres
+%endif
+    pmullw          m6, [pw_9]
+    pmullw          m1, [pw_9]
+    paddw           m6, m7
+    paddw           m1, m7
+%endif
+%ifdef m9
+    SWAP             7, 9
+%else
+    mova            m7, m_limsign
+%endif
+    psraw           m6, 7
+    psraw           m1, 7
+    packsswb        m6, m1          ; a1
+    pxor            m0, m0
+    psubb           m0, m6
+    pand            m0, m7          ; -a1
+    pandn           m7, m6          ; +a1
+%ifdef m8
+    SWAP             1, 13
+    SWAP             6, 14
+%else
+    mova            m1, m_p2backup
+    mova            m6, m_q2backup
+%endif
+    psubusb         m1, m0
+    paddusb         m6, m0
+    paddusb         m1, m7          ; p1+a1
+    psubusb         m6, m7          ; q1-a1
+
+    ; store
+%ifidn %1, v
+    movrow [dst2q+mstrideq*4], m1
+    movrow [dst1q+mstrideq*2], m2
+    movrow [dst1q+mstrideq  ], m3
+    movrow     [dst1q], m4
+    movrow     [dst2q], m5
+    movrow [dst2q+ strideq  ], m6
+%if mmsize == 16 && %2 == 8
+    add           dst8q, mstrideq
+    movhps [dst8q+mstrideq*2], m1
+    movhps [dst8q+mstrideq  ], m2
+    movhps     [dst8q], m3
+    add          dst8q, strideq
+    movhps     [dst8q], m4
+    movhps [dst8q+ strideq  ], m5
+    movhps [dst8q+ strideq*2], m6
+%endif
+%else ; h
+    inc          dst1q
+    inc          dst2q
+
+    ; 4x8/16 transpose
+    TRANSPOSE4x4B    1, 2, 3, 4, 0
+    SBUTTERFLY      bw, 5, 6, 0
+
+%if mmsize == 8 ; mmx/mmxext (h)
+    WRITE_4x2D       1, 2, 3, 4, dst1q, dst2q, mstrideq, strideq
+    add          dst1q, 4
+    WRITE_2x4W      m5, m6, dst2q, dst1q, mstrideq, strideq
+%else ; sse2 (h)
+    lea          dst8q, [dst8q+mstrideq+1]
+    WRITE_4x4D       1, 2, 3, 4, dst1q, dst2q, dst8q, mstrideq, strideq, %2
+    lea          dst1q, [dst2q+mstrideq+4]
+    lea          dst8q, [dst8q+mstrideq+4]
+%if cpuflag(sse4)
+    add          dst2q, 4
+%endif
+    WRITE_8W        m5, dst2q, dst1q,  mstrideq, strideq
+%if cpuflag(sse4)
+    lea          dst2q, [dst8q+ strideq  ]
+%endif
+    WRITE_8W        m6, dst2q, dst8q, mstrideq, strideq
+%endif
+%endif
+
+%if mmsize == 8
+%if %2 == 8 ; chroma
+%ifidn %1, h
+    sub          dst1q, 5
+%endif
+    cmp          dst1q, dst8q
+    mov          dst1q, dst8q
+    jnz .next8px
+%else
+%ifidn %1, h
+    lea          dst1q, [dst1q+ strideq*8-5]
+%else ; v
+    add          dst1q, 8
+%endif
+    dec          cntrq
+    jg .next8px
+%endif
+    REP_RET
+%else ; mmsize == 16
+    RET
+%endif
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+MBEDGE_LOOPFILTER v, 16
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER v,  8
+MBEDGE_LOOPFILTER h,  8
+
+INIT_MMX mmxext
+MBEDGE_LOOPFILTER v, 16
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER v,  8
+MBEDGE_LOOPFILTER h,  8
+%endif
+
+INIT_XMM sse2
+MBEDGE_LOOPFILTER v, 16
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER v,  8
+MBEDGE_LOOPFILTER h,  8
+
+INIT_XMM ssse3
+MBEDGE_LOOPFILTER v, 16
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER v,  8
+MBEDGE_LOOPFILTER h,  8
+
+INIT_XMM sse4
+MBEDGE_LOOPFILTER h, 16
+MBEDGE_LOOPFILTER h,  8
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init.c b/media/ffvpx/libavcodec/x86/vp9dsp_init.c
new file mode 100644
index 000000000..469a66171
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init.c
@@ -0,0 +1,400 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_YASM
+
+decl_fpel_func(put,  4,   , mmx);
+decl_fpel_func(put,  8,   , mmx);
+decl_fpel_func(put, 16,   , sse);
+decl_fpel_func(put, 32,   , sse);
+decl_fpel_func(put, 64,   , sse);
+decl_fpel_func(avg,  4, _8, mmxext);
+decl_fpel_func(avg,  8, _8, mmxext);
+decl_fpel_func(avg, 16, _8, sse2);
+decl_fpel_func(avg, 32, _8, sse2);
+decl_fpel_func(avg, 64, _8, sse2);
+decl_fpel_func(put, 32,   , avx);
+decl_fpel_func(put, 64,   , avx);
+decl_fpel_func(avg, 32, _8, avx2);
+decl_fpel_func(avg, 64, _8, avx2);
+
+decl_mc_funcs(4, mmxext, int16_t, 8, 8);
+decl_mc_funcs(8, sse2, int16_t,  8, 8);
+decl_mc_funcs(4, ssse3, int8_t, 32, 8);
+decl_mc_funcs(8, ssse3, int8_t, 32, 8);
+#if ARCH_X86_64
+decl_mc_funcs(16, ssse3, int8_t, 32, 8);
+decl_mc_funcs(32, avx2, int8_t, 32, 8);
+#endif
+
+mc_rep_funcs(16,  8,  8,  sse2, int16_t,  8, 8)
+#if ARCH_X86_32
+mc_rep_funcs(16,  8,  8, ssse3, int8_t,  32, 8)
+#endif
+mc_rep_funcs(32, 16, 16, sse2,  int16_t,  8, 8)
+mc_rep_funcs(32, 16, 16, ssse3, int8_t,  32, 8)
+mc_rep_funcs(64, 32, 32, sse2,  int16_t,  8, 8)
+mc_rep_funcs(64, 32, 32, ssse3, int8_t,  32, 8)
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+mc_rep_funcs(64, 32, 32, avx2,  int8_t,  32, 8)
+#endif
+
+extern const int8_t ff_filters_ssse3[3][15][4][32];
+extern const int16_t ff_filters_sse2[3][15][8][8];
+
+filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2)
+filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2)
+filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3)
+filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3)
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3)
+filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3)
+#endif
+
+filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2)
+filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2)
+filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3)
+filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3)
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3)
+filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3)
+filters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3)
+filters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3)
+#endif
+
+#define itxfm_func(typea, typeb, size, opt) \
+void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                            int16_t *block, int eob)
+#define itxfm_funcs(size, opt) \
+itxfm_func(idct,  idct,  size, opt); \
+itxfm_func(iadst, idct,  size, opt); \
+itxfm_func(idct,  iadst, size, opt); \
+itxfm_func(iadst, iadst, size, opt)
+
+itxfm_func(idct,  idct,  4, mmxext);
+itxfm_func(idct,  iadst, 4, sse2);
+itxfm_func(iadst, idct,  4, sse2);
+itxfm_func(iadst, iadst, 4, sse2);
+itxfm_funcs(4, ssse3);
+itxfm_funcs(8, sse2);
+itxfm_funcs(8, ssse3);
+itxfm_funcs(8, avx);
+itxfm_funcs(16, sse2);
+itxfm_funcs(16, ssse3);
+itxfm_funcs(16, avx);
+itxfm_func(idct, idct, 32, sse2);
+itxfm_func(idct, idct, 32, ssse3);
+itxfm_func(idct, idct, 32, avx);
+itxfm_func(iwht, iwht, 4, mmx);
+
+#undef itxfm_func
+#undef itxfm_funcs
+
+#define lpf_funcs(size1, size2, opt) \
+void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H); \
+void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H)
+
+lpf_funcs(16, 16, sse2);
+lpf_funcs(16, 16, ssse3);
+lpf_funcs(16, 16, avx);
+lpf_funcs(44, 16, sse2);
+lpf_funcs(44, 16, ssse3);
+lpf_funcs(44, 16, avx);
+lpf_funcs(84, 16, sse2);
+lpf_funcs(84, 16, ssse3);
+lpf_funcs(84, 16, avx);
+lpf_funcs(48, 16, sse2);
+lpf_funcs(48, 16, ssse3);
+lpf_funcs(48, 16, avx);
+lpf_funcs(88, 16, sse2);
+lpf_funcs(88, 16, ssse3);
+lpf_funcs(88, 16, avx);
+
+#undef lpf_funcs
+
+#define ipred_func(size, type, opt) \
+void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                   const uint8_t *l, const uint8_t *a)
+
+ipred_func(8, v, mmx);
+
+#define ipred_dc_funcs(size, opt) \
+ipred_func(size, dc, opt); \
+ipred_func(size, dc_left, opt); \
+ipred_func(size, dc_top, opt)
+
+ipred_dc_funcs(4, mmxext);
+ipred_dc_funcs(8, mmxext);
+
+#define ipred_dir_tm_funcs(size, opt) \
+ipred_func(size, tm, opt); \
+ipred_func(size, dl, opt); \
+ipred_func(size, dr, opt); \
+ipred_func(size, hd, opt); \
+ipred_func(size, hu, opt); \
+ipred_func(size, vl, opt); \
+ipred_func(size, vr, opt)
+
+ipred_dir_tm_funcs(4, mmxext);
+
+ipred_func(16, v, sse);
+ipred_func(32, v, sse);
+
+ipred_dc_funcs(16, sse2);
+ipred_dc_funcs(32, sse2);
+
+#define ipred_dir_tm_h_funcs(size, opt) \
+ipred_dir_tm_funcs(size, opt); \
+ipred_func(size, h, opt)
+
+ipred_dir_tm_h_funcs(8, sse2);
+ipred_dir_tm_h_funcs(16, sse2);
+ipred_dir_tm_h_funcs(32, sse2);
+
+ipred_func(4, h, sse2);
+
+#define ipred_all_funcs(size, opt) \
+ipred_dc_funcs(size, opt); \
+ipred_dir_tm_h_funcs(size, opt)
+
+// FIXME hd/vl_4x4_ssse3 does not exist
+ipred_all_funcs(4, ssse3);
+ipred_all_funcs(8, ssse3);
+ipred_all_funcs(16, ssse3);
+ipred_all_funcs(32, ssse3);
+
+ipred_dir_tm_h_funcs(8, avx);
+ipred_dir_tm_h_funcs(16, avx);
+ipred_dir_tm_h_funcs(32, avx);
+
+ipred_func(32, v, avx);
+
+ipred_dc_funcs(32, avx2);
+ipred_func(32, h, avx2);
+ipred_func(32, tm, avx2);
+
+#undef ipred_func
+#undef ipred_dir_tm_h_funcs
+#undef ipred_dir_tm_funcs
+#undef ipred_dc_funcs
+
+#endif /* HAVE_YASM */
+
+av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact)
+{
+#if HAVE_YASM
+    int cpu_flags;
+
+    if (bpp == 10) {
+        ff_vp9dsp_init_10bpp_x86(dsp, bitexact);
+        return;
+    } else if (bpp == 12) {
+        ff_vp9dsp_init_12bpp_x86(dsp, bitexact);
+        return;
+    }
+
+    cpu_flags = av_get_cpu_flags();
+
+#define init_lpf(opt) do { \
+    dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
+    dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
+    dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
+    dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
+    dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
+    dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
+    dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
+    dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
+    dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
+    dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
+} while (0)
+
+#define init_ipred(sz, opt, t, e) \
+    dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt
+
+#define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext
+#define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext
+#define init_dir_tm_ipred(sz, opt) do { \
+    init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \
+    init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \
+    init_ipred(sz, opt, hd, HOR_DOWN); \
+    init_ipred(sz, opt, vl, VERT_LEFT); \
+    init_ipred(sz, opt, hu, HOR_UP); \
+    init_ipred(sz, opt, tm, TM_VP8); \
+    init_ipred(sz, opt, vr, VERT_RIGHT); \
+} while (0)
+#define init_dir_tm_h_ipred(sz, opt) do { \
+    init_dir_tm_ipred(sz, opt); \
+    init_ipred(sz, opt, h,  HOR); \
+} while (0)
+#define init_dc_ipred(sz, opt) do { \
+    init_ipred(sz, opt, dc,      DC); \
+    init_ipred(sz, opt, dc_left, LEFT_DC); \
+    init_ipred(sz, opt, dc_top,  TOP_DC); \
+} while (0)
+#define init_all_ipred(sz, opt) do { \
+    init_dc_ipred(sz, opt); \
+    init_dir_tm_h_ipred(sz, opt); \
+} while (0)
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        init_fpel_func(4, 0,  4, put, , mmx);
+        init_fpel_func(3, 0,  8, put, , mmx);
+        if (!bitexact) {
+            dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
+            dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
+            dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
+            dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
+        }
+        init_ipred(8, mmx, v, VERT);
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_subpel2(4, 0, 4, put, 8, mmxext);
+        init_subpel2(4, 1, 4, avg, 8, mmxext);
+        init_fpel_func(4, 1,  4, avg, _8, mmxext);
+        init_fpel_func(3, 1,  8, avg, _8, mmxext);
+        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
+        init_dc_ipred(4, mmxext);
+        init_dc_ipred(8, mmxext);
+        init_dir_tm_ipred(4, mmxext);
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        init_fpel_func(2, 0, 16, put, , sse);
+        init_fpel_func(1, 0, 32, put, , sse);
+        init_fpel_func(0, 0, 64, put, , sse);
+        init_ipred(16, sse, v, VERT);
+        init_ipred(32, sse, v, VERT);
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_subpel3_8to64(0, put, 8, sse2);
+        init_subpel3_8to64(1, avg, 8, sse2);
+        init_fpel_func(2, 1, 16, avg,  _8, sse2);
+        init_fpel_func(1, 1, 32, avg,  _8, sse2);
+        init_fpel_func(0, 1, 64, avg,  _8, sse2);
+        init_lpf(sse2);
+        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_sse2;
+        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_sse2;
+        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
+        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
+        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_sse2;
+        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_sse2;
+        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
+        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_sse2;
+        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_sse2;
+        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_sse2;
+        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
+        dsp->itxfm_add[TX_32X32][ADST_ADST] =
+        dsp->itxfm_add[TX_32X32][ADST_DCT] =
+        dsp->itxfm_add[TX_32X32][DCT_ADST] =
+        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
+        init_dc_ipred(16, sse2);
+        init_dc_ipred(32, sse2);
+        init_dir_tm_h_ipred(8, sse2);
+        init_dir_tm_h_ipred(16, sse2);
+        init_dir_tm_h_ipred(32, sse2);
+        init_ipred(4, sse2, h, HOR);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_subpel3(0, put, 8, ssse3);
+        init_subpel3(1, avg, 8, ssse3);
+        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
+        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
+        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
+        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
+        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
+        dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
+        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
+        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
+        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
+        dsp->itxfm_add[TX_32X32][ADST_ADST] =
+        dsp->itxfm_add[TX_32X32][ADST_DCT] =
+        dsp->itxfm_add[TX_32X32][DCT_ADST] =
+        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
+        init_lpf(ssse3);
+        init_all_ipred(4, ssse3);
+        init_all_ipred(8, ssse3);
+        init_all_ipred(16, ssse3);
+        init_all_ipred(32, ssse3);
+    }
+
+    if (EXTERNAL_AVX(cpu_flags)) {
+        dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
+        dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
+        dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
+        dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
+        dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
+        dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
+        dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
+        dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
+        dsp->itxfm_add[TX_32X32][ADST_ADST] =
+        dsp->itxfm_add[TX_32X32][ADST_DCT] =
+        dsp->itxfm_add[TX_32X32][DCT_ADST] =
+        dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
+        init_lpf(avx);
+        init_dir_tm_h_ipred(8, avx);
+        init_dir_tm_h_ipred(16, avx);
+        init_dir_tm_h_ipred(32, avx);
+    }
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        init_fpel_func(1, 0, 32, put, , avx);
+        init_fpel_func(0, 0, 64, put, , avx);
+        init_ipred(32, avx, v, VERT);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        init_fpel_func(1, 1, 32, avg, _8, avx2);
+        init_fpel_func(0, 1, 64, avg, _8, avx2);
+        if (ARCH_X86_64) {
+#if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
+            init_subpel3_32_64(0, put, 8, avx2);
+            init_subpel3_32_64(1, avg, 8, avx2);
+#endif
+        }
+        init_dc_ipred(32, avx2);
+        init_ipred(32, avx2, h,  HOR);
+        init_ipred(32, avx2, tm, TM_VP8);
+    }
+
+#undef init_fpel
+#undef init_subpel1
+#undef init_subpel2
+#undef init_subpel3
+
+#endif /* HAVE_YASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init.h b/media/ffvpx/libavcodec/x86/vp9dsp_init.h
new file mode 100644
index 000000000..e410cab3a
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init.h
@@ -0,0 +1,189 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP9DSP_INIT_H
+#define AVCODEC_X86_VP9DSP_INIT_H
+
+#include "libavcodec/vp9dsp.h"
+
+// hack to force-expand BPC
+#define cat(a, bpp, b) a##bpp##b
+
+#define decl_fpel_func(avg, sz, bpp, opt) \
+void ff_vp9_##avg##sz##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const uint8_t *src, ptrdiff_t src_stride, \
+                                   int h, int mx, int my)
+
+#define decl_mc_func(avg, sz, dir, opt, type, f_sz, bpp) \
+void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                         const uint8_t *src, ptrdiff_t src_stride, \
+                                                         int h, const type (*filter)[f_sz])
+
+#define decl_mc_funcs(sz, opt, type, fsz, bpp) \
+decl_mc_func(put, sz, h, opt, type, fsz, bpp); \
+decl_mc_func(avg, sz, h, opt, type, fsz, bpp); \
+decl_mc_func(put, sz, v, opt, type, fsz, bpp); \
+decl_mc_func(avg, sz, v, opt, type, fsz, bpp)
+
+#define decl_ipred_fn(type, sz, bpp, opt) \
+void ff_vp9_ipred_##type##_##sz##x##sz##_##bpp##_##opt(uint8_t *dst, \
+                                                       ptrdiff_t stride, \
+                                                       const uint8_t *l, \
+                                                       const uint8_t *a)
+
+#define decl_ipred_fns(type, bpp, opt4, opt8_16_32) \
+decl_ipred_fn(type,  4, bpp, opt4); \
+decl_ipred_fn(type,  8, bpp, opt8_16_32); \
+decl_ipred_fn(type, 16, bpp, opt8_16_32); \
+decl_ipred_fn(type, 32, bpp, opt8_16_32)
+
+#define decl_itxfm_func(typea, typeb, size, bpp, opt) \
+void cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt)(uint8_t *dst, \
+                                                                         ptrdiff_t stride, \
+                                                                         int16_t *block, \
+                                                                         int eob)
+
+#define decl_itxfm_funcs(size, bpp, opt) \
+decl_itxfm_func(idct,  idct,  size, bpp, opt); \
+decl_itxfm_func(iadst, idct,  size, bpp, opt); \
+decl_itxfm_func(idct,  iadst, size, bpp, opt); \
+decl_itxfm_func(iadst, iadst, size, bpp, opt)
+
+#define mc_rep_func(avg, sz, hsz, hszb, dir, opt, type, f_sz, bpp) \
+static av_always_inline void \
+ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                    const uint8_t *src, ptrdiff_t src_stride, \
+                                                    int h, const type (*filter)[f_sz]) \
+{ \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst,        dst_stride, src, \
+                                                         src_stride, h, filter); \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##bpp##_##opt(dst + hszb, dst_stride, src + hszb, \
+                                                         src_stride, h, filter); \
+}
+
+#define mc_rep_funcs(sz, hsz, hszb, opt, type, fsz, bpp) \
+mc_rep_func(put, sz, hsz, hszb, h, opt, type, fsz, bpp) \
+mc_rep_func(avg, sz, hsz, hszb, h, opt, type, fsz, bpp) \
+mc_rep_func(put, sz, hsz, hszb, v, opt, type, fsz, bpp) \
+mc_rep_func(avg, sz, hsz, hszb, v, opt, type, fsz, bpp)
+
+#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, bpp, opt) \
+static void op##_8tap_##fname##_##sz##dir##_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                          const uint8_t *src, ptrdiff_t src_stride, \
+                                                          int h, int mx, int my) \
+{ \
+    ff_vp9_##op##_8tap_1d_##dir##_##sz##_##bpp##_##opt(dst, dst_stride, src, src_stride, \
+                                                       h, ff_filters_##f_opt[f][dvar - 1]); \
+}
+
+#define filters_8tap_1d_fn(op, sz, dir, dvar, bpp, opt, f_opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, dir, dvar, bpp, opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp,   dir, dvar, bpp, opt) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth,  dir, dvar, bpp, opt)
+
+#define filters_8tap_1d_fn2(op, sz, bpp, opt, f_opt) \
+filters_8tap_1d_fn(op, sz, h, mx, bpp, opt, f_opt) \
+filters_8tap_1d_fn(op, sz, v, my, bpp, opt, f_opt)
+
+#define filters_8tap_1d_fn3(op, bpp, opt4, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 64, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 32, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 16, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 8, bpp, opt8, f_opt) \
+filters_8tap_1d_fn2(op, 4, bpp, opt4, f_opt)
+
+#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, bpp, bytes, opt) \
+static void op##_8tap_##fname##_##sz##hv_##bpp##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                       const uint8_t *src, ptrdiff_t src_stride, \
+                                                       int h, int mx, int my) \
+{ \
+    LOCAL_ALIGNED_##align(uint8_t, temp, [71 * 64 * bytes]); \
+    ff_vp9_put_8tap_1d_h_##sz##_##bpp##_##opt(temp, 64 * bytes, src - 3 * src_stride, \
+                                              src_stride,  h + 7, \
+                                              ff_filters_##f_opt[f][mx - 1]); \
+    ff_vp9_##op##_8tap_1d_v_##sz##_##bpp##_##opt(dst, dst_stride, temp + 3 * bytes * 64, \
+                                                 64 * bytes, h, \
+                                                 ff_filters_##f_opt[f][my - 1]); \
+}
+
+#define filters_8tap_2d_fn(op, sz, align, bpp, bytes, opt, f_opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, f_opt, regular, align, bpp, bytes, opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP,   f_opt, sharp, align, bpp, bytes, opt) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH,  f_opt, smooth, align, bpp, bytes, opt)
+
+#define filters_8tap_2d_fn2(op, align, bpp, bytes, opt4, opt8, f_opt) \
+filters_8tap_2d_fn(op, 64, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 32, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 16, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 8, align, bpp, bytes, opt8, f_opt) \
+filters_8tap_2d_fn(op, 4, align, bpp, bytes, opt4, f_opt)
+
+#define init_fpel_func(idx1, idx2, sz, type, bpp, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##bpp##_##opt
+
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, bpp, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = \
+        type##_8tap_smooth_##sz##dir##_##bpp##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = \
+        type##_8tap_regular_##sz##dir##_##bpp##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = \
+        type##_8tap_sharp_##sz##dir##_##bpp##_##opt
+
+#define init_subpel2(idx1, idx2, sz, type, bpp, opt) \
+    init_subpel1(idx1, idx2, 1, 1, sz, hv, type, bpp, opt); \
+    init_subpel1(idx1, idx2, 0, 1, sz, v,  type, bpp, opt); \
+    init_subpel1(idx1, idx2, 1, 0, sz, h,  type, bpp, opt)
+
+#define init_subpel3_32_64(idx, type, bpp, opt) \
+    init_subpel2(0, idx, 64, type, bpp, opt); \
+    init_subpel2(1, idx, 32, type, bpp, opt)
+
+#define init_subpel3_8to64(idx, type, bpp, opt) \
+    init_subpel3_32_64(idx, type, bpp, opt); \
+    init_subpel2(2, idx, 16, type, bpp, opt); \
+    init_subpel2(3, idx,  8, type, bpp, opt)
+
+#define init_subpel3(idx, type, bpp, opt) \
+    init_subpel3_8to64(idx, type, bpp, opt); \
+    init_subpel2(4, idx,  4, type, bpp, opt)
+
+#define init_ipred_func(type, enum, sz, bpp, opt) \
+    dsp->intra_pred[TX_##sz##X##sz][enum##_PRED] = \
+        cat(ff_vp9_ipred_##type##_##sz##x##sz##_, bpp, _##opt)
+
+#define init_8_16_32_ipred_funcs(type, enum, bpp, opt) \
+    init_ipred_func(type, enum,  8, bpp, opt); \
+    init_ipred_func(type, enum, 16, bpp, opt); \
+    init_ipred_func(type, enum, 32, bpp, opt)
+
+#define init_ipred_funcs(type, enum, bpp, opt) \
+    init_ipred_func(type, enum,  4, bpp, opt); \
+    init_8_16_32_ipred_funcs(type, enum, bpp, opt)
+
+void ff_vp9dsp_init_10bpp_x86(VP9DSPContext *dsp, int bitexact);
+void ff_vp9dsp_init_12bpp_x86(VP9DSPContext *dsp, int bitexact);
+void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp);
+
+#endif /* AVCODEC_X86_VP9DSP_INIT_H */
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c
new file mode 100644
index 000000000..2694c06cb
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_10bpp.c
@@ -0,0 +1,25 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPC 10
+#define INIT_FUNC ff_vp9dsp_init_10bpp_x86
+#include "vp9dsp_init_16bpp_template.c"
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c
new file mode 100644
index 000000000..5da3bc184
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_12bpp.c
@@ -0,0 +1,25 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BPC 12
+#define INIT_FUNC ff_vp9dsp_init_12bpp_x86
+#include "vp9dsp_init_16bpp_template.c"
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c
new file mode 100644
index 000000000..eb67499c9
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp.c
@@ -0,0 +1,139 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_YASM
+
+decl_fpel_func(put,   8,    , mmx);
+decl_fpel_func(avg,   8, _16, mmxext);
+decl_fpel_func(put,  16,    , sse);
+decl_fpel_func(put,  32,    , sse);
+decl_fpel_func(put,  64,    , sse);
+decl_fpel_func(put, 128,    , sse);
+decl_fpel_func(avg,  16, _16, sse2);
+decl_fpel_func(avg,  32, _16, sse2);
+decl_fpel_func(avg,  64, _16, sse2);
+decl_fpel_func(avg, 128, _16, sse2);
+decl_fpel_func(put,  32,    , avx);
+decl_fpel_func(put,  64,    , avx);
+decl_fpel_func(put, 128,    , avx);
+decl_fpel_func(avg,  32, _16, avx2);
+decl_fpel_func(avg,  64, _16, avx2);
+decl_fpel_func(avg, 128, _16, avx2);
+
+decl_ipred_fns(v,       16, mmx,    sse);
+decl_ipred_fns(h,       16, mmxext, sse2);
+decl_ipred_fns(dc,      16, mmxext, sse2);
+decl_ipred_fns(dc_top,  16, mmxext, sse2);
+decl_ipred_fns(dc_left, 16, mmxext, sse2);
+
+#define decl_ipred_dir_funcs(type) \
+decl_ipred_fns(type, 16, sse2,  sse2); \
+decl_ipred_fns(type, 16, ssse3, ssse3); \
+decl_ipred_fns(type, 16, avx,   avx)
+
+decl_ipred_dir_funcs(dl);
+decl_ipred_dir_funcs(dr);
+decl_ipred_dir_funcs(vl);
+decl_ipred_dir_funcs(vr);
+decl_ipred_dir_funcs(hu);
+decl_ipred_dir_funcs(hd);
+#endif /* HAVE_YASM */
+
+av_cold void ff_vp9dsp_init_16bpp_x86(VP9DSPContext *dsp)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        init_fpel_func(4, 0,   8, put, , mmx);
+        init_ipred_func(v, VERT, 4, 16, mmx);
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_fpel_func(4, 1,   8, avg, _16, mmxext);
+        init_ipred_func(h, HOR, 4, 16, mmxext);
+        init_ipred_func(dc, DC, 4, 16, mmxext);
+        init_ipred_func(dc_top,  TOP_DC,  4, 16, mmxext);
+        init_ipred_func(dc_left, LEFT_DC, 4, 16, mmxext);
+    }
+
+    if (EXTERNAL_SSE(cpu_flags)) {
+        init_fpel_func(3, 0,  16, put, , sse);
+        init_fpel_func(2, 0,  32, put, , sse);
+        init_fpel_func(1, 0,  64, put, , sse);
+        init_fpel_func(0, 0, 128, put, , sse);
+        init_8_16_32_ipred_funcs(v, VERT, 16, sse);
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_fpel_func(3, 1,  16, avg, _16, sse2);
+        init_fpel_func(2, 1,  32, avg, _16, sse2);
+        init_fpel_func(1, 1,  64, avg, _16, sse2);
+        init_fpel_func(0, 1, 128, avg, _16, sse2);
+        init_8_16_32_ipred_funcs(h, HOR, 16, sse2);
+        init_8_16_32_ipred_funcs(dc, DC, 16, sse2);
+        init_8_16_32_ipred_funcs(dc_top,  TOP_DC,  16, sse2);
+        init_8_16_32_ipred_funcs(dc_left, LEFT_DC, 16, sse2);
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, sse2);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, sse2);
+        init_ipred_funcs(vl, VERT_LEFT, 16, sse2);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, sse2);
+        init_ipred_funcs(hu, HOR_UP, 16, sse2);
+        init_ipred_funcs(hd, HOR_DOWN, 16, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, ssse3);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, ssse3);
+        init_ipred_funcs(vl, VERT_LEFT, 16, ssse3);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, ssse3);
+        init_ipred_funcs(hu, HOR_UP, 16, ssse3);
+        init_ipred_funcs(hd, HOR_DOWN, 16, ssse3);
+    }
+
+    if (EXTERNAL_AVX_FAST(cpu_flags)) {
+        init_fpel_func(2, 0,  32, put, , avx);
+        init_fpel_func(1, 0,  64, put, , avx);
+        init_fpel_func(0, 0, 128, put, , avx);
+        init_ipred_funcs(dl, DIAG_DOWN_LEFT, 16, avx);
+        init_ipred_funcs(dr, DIAG_DOWN_RIGHT, 16, avx);
+        init_ipred_funcs(vl, VERT_LEFT, 16, avx);
+        init_ipred_funcs(vr, VERT_RIGHT, 16, avx);
+        init_ipred_funcs(hu, HOR_UP, 16, avx);
+        init_ipred_funcs(hd, HOR_DOWN, 16, avx);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        init_fpel_func(2, 1,  32, avg, _16, avx2);
+        init_fpel_func(1, 1,  64, avg, _16, avx2);
+        init_fpel_func(0, 1, 128, avg, _16, avx2);
+    }
+
+#endif /* HAVE_YASM */
+}
diff --git a/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c
new file mode 100644
index 000000000..4840b2844
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9dsp_init_16bpp_template.c
@@ -0,0 +1,240 @@
+/*
+ * VP9 SIMD optimizations
+ *
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/vp9dsp.h"
+#include "libavcodec/x86/vp9dsp_init.h"
+
+#if HAVE_YASM
+
+extern const int16_t ff_filters_16bpp[3][15][4][16];
+
+decl_mc_funcs(4, sse2, int16_t, 16, BPC);
+decl_mc_funcs(8, sse2, int16_t, 16, BPC);
+decl_mc_funcs(16, avx2, int16_t, 16, BPC);
+
+mc_rep_funcs(16,  8, 16, sse2, int16_t, 16, BPC)
+mc_rep_funcs(32, 16, 32, sse2, int16_t, 16, BPC)
+mc_rep_funcs(64, 32, 64, sse2, int16_t, 16, BPC)
+#if HAVE_AVX2_EXTERNAL
+mc_rep_funcs(32, 16, 32, avx2, int16_t, 16, BPC)
+mc_rep_funcs(64, 32, 64, avx2, int16_t, 16, BPC)
+#endif
+
+filters_8tap_2d_fn2(put, 16, BPC, 2, sse2, sse2, 16bpp)
+filters_8tap_2d_fn2(avg, 16, BPC, 2, sse2, sse2, 16bpp)
+#if HAVE_AVX2_EXTERNAL
+filters_8tap_2d_fn(put, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 64, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 32, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(put, 16, 32, BPC, 2, avx2, 16bpp)
+filters_8tap_2d_fn(avg, 16, 32, BPC, 2, avx2, 16bpp)
+#endif
+
+filters_8tap_1d_fn3(put, BPC, sse2, sse2, 16bpp)
+filters_8tap_1d_fn3(avg, BPC, sse2, sse2, 16bpp)
+#if HAVE_AVX2_EXTERNAL
+filters_8tap_1d_fn2(put, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 64, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 32, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(put, 16, BPC, avx2, 16bpp)
+filters_8tap_1d_fn2(avg, 16, BPC, avx2, 16bpp)
+#endif
+
+#define decl_lpf_func(dir, wd, bpp, opt) \
+void ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                     int E, int I, int H)
+
+#define decl_lpf_funcs(dir, wd, bpp) \
+decl_lpf_func(dir, wd, bpp, sse2); \
+decl_lpf_func(dir, wd, bpp, ssse3); \
+decl_lpf_func(dir, wd, bpp, avx)
+
+#define decl_lpf_funcs_wd(dir) \
+decl_lpf_funcs(dir,  4, BPC); \
+decl_lpf_funcs(dir,  8, BPC); \
+decl_lpf_funcs(dir, 16, BPC)
+
+decl_lpf_funcs_wd(h);
+decl_lpf_funcs_wd(v);
+
+#define lpf_16_wrapper(dir, off, bpp, opt) \
+static void loop_filter_##dir##_16_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                 int E, int I, int H) \
+{ \
+    ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst,       stride, E, I, H); \
+    ff_vp9_loop_filter_##dir##_16_##bpp##_##opt(dst + off, stride, E, I, H); \
+}
+
+#define lpf_16_wrappers(bpp, opt) \
+lpf_16_wrapper(h, 8 * stride, bpp, opt) \
+lpf_16_wrapper(v, 16,         bpp, opt)
+
+lpf_16_wrappers(BPC, sse2)
+lpf_16_wrappers(BPC, ssse3)
+lpf_16_wrappers(BPC, avx)
+
+#define lpf_mix2_wrapper(dir, off, wd1, wd2, bpp, opt) \
+static void loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                           int E, int I, int H) \
+{ \
+    ff_vp9_loop_filter_##dir##_##wd1##_##bpp##_##opt(dst,       stride, \
+                                                     E & 0xff, I & 0xff, H & 0xff); \
+    ff_vp9_loop_filter_##dir##_##wd2##_##bpp##_##opt(dst + off, stride, \
+                                                     E >> 8,   I >> 8,   H >> 8); \
+}
+
+#define lpf_mix2_wrappers(wd1, wd2, bpp, opt) \
+lpf_mix2_wrapper(h, 8 * stride, wd1, wd2, bpp, opt) \
+lpf_mix2_wrapper(v, 16,         wd1, wd2, bpp, opt)
+
+#define lpf_mix2_wrappers_set(bpp, opt) \
+lpf_mix2_wrappers(4, 4, bpp, opt) \
+lpf_mix2_wrappers(4, 8, bpp, opt) \
+lpf_mix2_wrappers(8, 4, bpp, opt) \
+lpf_mix2_wrappers(8, 8, bpp, opt) \
+
+lpf_mix2_wrappers_set(BPC, sse2)
+lpf_mix2_wrappers_set(BPC, ssse3)
+lpf_mix2_wrappers_set(BPC, avx)
+
+decl_ipred_fns(tm, BPC, mmxext, sse2);
+
+decl_itxfm_func(iwht, iwht, 4, BPC, mmxext);
+#if BPC == 10
+decl_itxfm_func(idct,  idct,  4, BPC, mmxext);
+decl_itxfm_funcs(4, BPC, ssse3);
+#else
+decl_itxfm_func(idct,  idct,  4, BPC, sse2);
+#endif
+decl_itxfm_func(idct,  iadst, 4, BPC, sse2);
+decl_itxfm_func(iadst, idct,  4, BPC, sse2);
+decl_itxfm_func(iadst, iadst, 4, BPC, sse2);
+decl_itxfm_funcs(8, BPC, sse2);
+decl_itxfm_funcs(16, BPC, sse2);
+decl_itxfm_func(idct,  idct, 32, BPC, sse2);
+#endif /* HAVE_YASM */
+
+av_cold void INIT_FUNC(VP9DSPContext *dsp, int bitexact)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+#define init_lpf_8_func(idx1, idx2, dir, wd, bpp, opt) \
+    dsp->loop_filter_8[idx1][idx2] = ff_vp9_loop_filter_##dir##_##wd##_##bpp##_##opt
+#define init_lpf_16_func(idx, dir, bpp, opt) \
+    dsp->loop_filter_16[idx] = loop_filter_##dir##_16_##bpp##_##opt
+#define init_lpf_mix2_func(idx1, idx2, idx3, dir, wd1, wd2, bpp, opt) \
+    dsp->loop_filter_mix2[idx1][idx2][idx3] = loop_filter_##dir##_##wd1##wd2##_##bpp##_##opt
+
+#define init_lpf_funcs(bpp, opt) \
+    init_lpf_8_func(0, 0, h,  4, bpp, opt); \
+    init_lpf_8_func(0, 1, v,  4, bpp, opt); \
+    init_lpf_8_func(1, 0, h,  8, bpp, opt); \
+    init_lpf_8_func(1, 1, v,  8, bpp, opt); \
+    init_lpf_8_func(2, 0, h, 16, bpp, opt); \
+    init_lpf_8_func(2, 1, v, 16, bpp, opt); \
+    init_lpf_16_func(0, h, bpp, opt); \
+    init_lpf_16_func(1, v, bpp, opt); \
+    init_lpf_mix2_func(0, 0, 0, h, 4, 4, bpp, opt); \
+    init_lpf_mix2_func(0, 1, 0, h, 4, 8, bpp, opt); \
+    init_lpf_mix2_func(1, 0, 0, h, 8, 4, bpp, opt); \
+    init_lpf_mix2_func(1, 1, 0, h, 8, 8, bpp, opt); \
+    init_lpf_mix2_func(0, 0, 1, v, 4, 4, bpp, opt); \
+    init_lpf_mix2_func(0, 1, 1, v, 4, 8, bpp, opt); \
+    init_lpf_mix2_func(1, 0, 1, v, 8, 4, bpp, opt); \
+    init_lpf_mix2_func(1, 1, 1, v, 8, 8, bpp, opt)
+
+#define init_itx_func(idxa, idxb, typea, typeb, size, bpp, opt) \
+    dsp->itxfm_add[idxa][idxb] = \
+        cat(ff_vp9_##typea##_##typeb##_##size##x##size##_add_, bpp, _##opt);
+#define init_itx_func_one(idx, typea, typeb, size, bpp, opt) \
+    init_itx_func(idx, DCT_DCT,   typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, ADST_DCT,  typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, DCT_ADST,  typea, typeb, size, bpp, opt); \
+    init_itx_func(idx, ADST_ADST, typea, typeb, size, bpp, opt)
+#define init_itx_funcs(idx, size, bpp, opt) \
+    init_itx_func(idx, DCT_DCT,   idct,  idct,  size, bpp, opt); \
+    init_itx_func(idx, ADST_DCT,  idct,  iadst, size, bpp, opt); \
+    init_itx_func(idx, DCT_ADST,  iadst, idct,  size, bpp, opt); \
+    init_itx_func(idx, ADST_ADST, iadst, iadst, size, bpp, opt); \
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_ipred_func(tm, TM_VP8, 4, BPC, mmxext);
+        if (!bitexact) {
+            init_itx_func_one(4 /* lossless */, iwht, iwht, 4, BPC, mmxext);
+#if BPC == 10
+            init_itx_func(TX_4X4, DCT_DCT, idct, idct, 4, 10, mmxext);
+#endif
+        }
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        init_subpel3(0, put, BPC, sse2);
+        init_subpel3(1, avg, BPC, sse2);
+        init_lpf_funcs(BPC, sse2);
+        init_8_16_32_ipred_funcs(tm, TM_VP8, BPC, sse2);
+#if BPC == 10
+        if (!bitexact) {
+            init_itx_func(TX_4X4, ADST_DCT,  idct,  iadst, 4, 10, sse2);
+            init_itx_func(TX_4X4, DCT_ADST,  iadst, idct,  4, 10, sse2);
+            init_itx_func(TX_4X4, ADST_ADST, iadst, iadst, 4, 10, sse2);
+        }
+#else
+        init_itx_funcs(TX_4X4, 4, 12, sse2);
+#endif
+        init_itx_funcs(TX_8X8, 8, BPC, sse2);
+        init_itx_funcs(TX_16X16, 16, BPC, sse2);
+        init_itx_func_one(TX_32X32, idct, idct, 32, BPC, sse2);
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        init_lpf_funcs(BPC, ssse3);
+#if BPC == 10
+        if (!bitexact) {
+            init_itx_funcs(TX_4X4, 4, BPC, ssse3);
+        }
+#endif
+    }
+
+    if (EXTERNAL_AVX(cpu_flags)) {
+        init_lpf_funcs(BPC, avx);
+    }
+
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+#if HAVE_AVX2_EXTERNAL
+        init_subpel3_32_64(0,  put, BPC, avx2);
+        init_subpel3_32_64(1,  avg, BPC, avx2);
+        init_subpel2(2, 0, 16, put, BPC, avx2);
+        init_subpel2(2, 1, 16, avg, BPC, avx2);
+#endif
+    }
+
+#endif /* HAVE_YASM */
+
+    ff_vp9dsp_init_16bpp_x86(dsp);
+}
diff --git a/media/ffvpx/libavcodec/x86/vp9intrapred.asm b/media/ffvpx/libavcodec/x86/vp9intrapred.asm
new file mode 100644
index 000000000..31f7d449f
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9intrapred.asm
@@ -0,0 +1,2044 @@
+;******************************************************************************
+;* VP9 Intra prediction SIMD optimizations
+;*
+;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* Parts based on:
+;* H.264 intra prediction asm optimizations
+;* Copyright (c) 2010 Fiona Glaser
+;* Copyright (c) 2010 Holger Lubitz
+;* Copyright (c) 2010 Loren Merritt
+;* Copyright (c) 2010 Ronald S. Bultje
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pw_m256: times 16 dw -256
+pw_m255: times 16 dw -255
+pw_4096: times 8 dw 4096
+
+pb_4x3_4x2_4x1_4x0: times 4 db 3
+                    times 4 db 2
+                    times 4 db 1
+                    times 4 db 0
+pb_8x1_8x0:   times 8 db 1
+              times 8 db 0
+pb_8x3_8x2:   times 8 db 3
+              times 8 db 2
+pb_0to5_2x7:  db 0, 1, 2, 3, 4, 5, 7, 7
+              times 8 db -1
+pb_0to6_9x7:  db 0, 1, 2, 3, 4, 5, 6
+              times 9 db 7
+pb_1to6_10x7: db 1, 2, 3, 4, 5, 6
+              times 10 db 7
+pb_2to6_3x7:
+pb_2to6_11x7: db 2, 3, 4, 5, 6
+              times 11 db 7
+pb_1toE_2xF:  db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15
+pb_2toE_3xF:  db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15
+pb_13456_3xm1: db 1, 3, 4, 5, 6
+               times 3 db -1
+pb_6012_4xm1: db 6, 0, 1, 2
+              times 4 db -1
+pb_6xm1_246_8toE: times 6 db -1
+                  db 2, 4, 6, 8, 9, 10, 11, 12, 13, 14
+pb_6xm1_BDF_0to6: times 6 db -1
+                  db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
+pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
+
+pb_15x0_1xm1: times 15 db 0
+              db -1
+pb_0to2_5x3: db 0, 1, 2
+             times 5 db 3
+pb_6xm1_2x0: times 6 db -1
+             times 2 db 0
+pb_6x0_2xm1: times 6 db 0
+             times 2 db -1
+
+cextern pb_1
+cextern pb_2
+cextern pb_3
+cextern pb_15
+cextern pw_2
+cextern pw_4
+cextern pw_8
+cextern pw_16
+cextern pw_32
+cextern pw_255
+cextern pw_512
+cextern pw_1024
+cextern pw_2048
+cextern pw_8192
+
+SECTION .text
+
+; dc_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
+
+%macro DC_4to8_FUNCS 0
+cglobal vp9_ipred_dc_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_4096]
+    pshufb                  m0, m1
+%else
+    paddw                   m0, [pw_4]
+    psraw                   m0, 3
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    lea                   dstq, [dstq+strideq*2]
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    RET
+
+cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [lq]
+    movq                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_2048]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_8]
+    psraw                   m0, 4
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DC_4to8_FUNCS
+INIT_MMX ssse3
+DC_4to8_FUNCS
+
+%macro DC_16to32_FUNCS 0
+cglobal vp9_ipred_dc_16x16, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_1024]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_16]
+    psraw                   m0, 5
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dc_32x32, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    mova                    m2, [aq]
+    mova                    m3, [aq+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m4, m4
+    psadbw                  m0, m4
+    psadbw                  m1, m4
+    psadbw                  m2, m4
+    psadbw                  m3, m4
+    paddw                   m0, m1
+    paddw                   m2, m3
+    paddw                   m0, m2
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_512]
+    pshufb                  m0, m4
+%else
+    paddw                   m0, [pw_32]
+    psraw                   m0, 6
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DC_16to32_FUNCS
+INIT_XMM ssse3
+DC_16to32_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_dc_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    vextracti128           xm1, m0, 1
+    paddw                  xm0, xm1
+    movhlps                xm1, xm0
+    paddw                  xm0, xm1
+    pmulhrsw               xm0, [pw_512]
+    vpbroadcastb            m0, xm0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+
+; dc_top/left_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
+
+%macro DC_1D_4to8_FUNCS 2 ; dir (top or left), arg (a or l)
+cglobal vp9_ipred_dc_%1_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [%2q]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_8192]
+    pshufb                  m0, m1
+%else
+    paddw                   m0, [pw_2]
+    psraw                   m0, 2
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    lea                   dstq, [dstq+strideq*2]
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    RET
+
+cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_4096]
+    pshufb                  m0, m1
+%else
+    paddw                   m0, [pw_4]
+    psraw                   m0, 3
+    punpcklbw               m0, m0
+    pshufw                  m0, m0, q0000
+%endif
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DC_1D_4to8_FUNCS top,  a
+DC_1D_4to8_FUNCS left, l
+INIT_MMX ssse3
+DC_1D_4to8_FUNCS top,  a
+DC_1D_4to8_FUNCS left, l
+
+%macro DC_1D_16to32_FUNCS 2; dir (top or left), arg (a or l)
+cglobal vp9_ipred_dc_%1_16x16, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_2048]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_8]
+    psraw                   m0, 4
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    mova                    m1, [%2q+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    movhlps                 m1, m0
+    paddw                   m0, m1
+%if cpuflag(ssse3)
+    pmulhrsw                m0, [pw_1024]
+    pshufb                  m0, m2
+%else
+    paddw                   m0, [pw_16]
+    psraw                   m0, 5
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+%endif
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DC_1D_16to32_FUNCS top,  a
+DC_1D_16to32_FUNCS left, l
+INIT_XMM ssse3
+DC_1D_16to32_FUNCS top,  a
+DC_1D_16to32_FUNCS left, l
+
+%macro DC_1D_AVX2_FUNCS 2 ; dir (top or left), arg (a or l)
+%if HAVE_AVX2_EXTERNAL
+cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    vextracti128           xm1, m0, 1
+    paddw                  xm0, xm1
+    movhlps                xm1, xm0
+    paddw                  xm0, xm1
+    pmulhrsw               xm0, [pw_1024]
+    vpbroadcastb            m0, xm0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+%endmacro
+
+INIT_YMM avx2
+DC_1D_AVX2_FUNCS top,  a
+DC_1D_AVX2_FUNCS left, l
+
+; v
+
+INIT_MMX mmx
+cglobal vp9_ipred_v_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_16x16, 4, 4, 1, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_32x32, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m1, [aq+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m1
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m1
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_YMM avx
+cglobal vp9_ipred_v_32x32, 4, 4, 1, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+; h
+
+%macro H_XMM_FUNCS 2
+%if notcpuflag(avx)
+cglobal vp9_ipred_h_4x4, 3, 4, 1, dst, stride, l, stride3
+    movd                    m0, [lq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_4x3_4x2_4x1_4x0]
+%else
+    punpcklbw               m0, m0
+    pshuflw                 m0, m0, q0123
+    punpcklwd               m0, m0
+%endif
+    lea               stride3q, [strideq*3]
+    movd      [dstq+strideq*0], m0
+    psrldq                  m0, 4
+    movd      [dstq+strideq*1], m0
+    psrldq                  m0, 4
+    movd      [dstq+strideq*2], m0
+    psrldq                  m0, 4
+    movd      [dstq+stride3q ], m0
+    RET
+%endif
+
+cglobal vp9_ipred_h_8x8, 3, 5, %1, dst, stride, l, stride3, cnt
+%if cpuflag(ssse3)
+    mova                    m2, [pb_8x1_8x0]
+    mova                    m3, [pb_8x3_8x2]
+%endif
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 1
+.loop:
+    movd                    m0, [lq+cntq*4]
+%if cpuflag(ssse3)
+    pshufb                  m1, m0, m3
+    pshufb                  m0, m2
+%else
+    punpcklbw               m0, m0
+    punpcklwd               m0, m0
+    pshufd                  m1, m0, q2233
+    pshufd                  m0, m0, q0011
+%endif
+    movq      [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m1
+    movq      [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+
+cglobal vp9_ipred_h_16x16, 3, 5, %2, dst, stride, l, stride3, cnt
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+%endif
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 3
+.loop:
+    movd                    m3, [lq+cntq*4]
+%if cpuflag(ssse3)
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+%else
+    punpcklbw               m3, m3
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+%endif
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+%if cpuflag(ssse3)
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+%else
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+%endif
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+
+cglobal vp9_ipred_h_32x32, 3, 5, %2, dst, stride, l, stride3, cnt
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+%endif
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 7
+.loop:
+    movd                    m3, [lq+cntq*4]
+%if cpuflag(ssse3)
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+%else
+    punpcklbw               m3, m3
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+%endif
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m1
+%if cpuflag(ssse3)
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+%else
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+%endif
+    mova   [dstq+strideq*2+ 0], m2
+    mova   [dstq+strideq*2+16], m2
+    mova   [dstq+stride3q + 0], m3
+    mova   [dstq+stride3q +16], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+H_XMM_FUNCS 2, 4
+INIT_XMM ssse3
+H_XMM_FUNCS 4, 8
+INIT_XMM avx
+H_XMM_FUNCS 4, 8
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_h_32x32, 3, 5, 8, dst, stride, l, stride3, cnt
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 7
+.loop:
+    movd                   xm3, [lq+cntq*4]
+    vinserti128             m3, m3, xm3, 1
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+
+; tm
+
+%macro TM_MMX_FUNCS 0
+cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a
+    pxor                    m1, m1
+    movd                    m0, [aq]
+    pinsrw                  m2, [aq-1], 0
+    punpcklbw               m0, m1
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+    mova                    m3, [pw_m256]
+    mova                    m1, [pw_m255]
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    pshufw                  m2, m2, q0000
+%endif
+    psubw                   m0, m2
+    mov                   cntq, 1
+.loop:
+    pinsrw                  m2, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m4, m2, m1
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    pshufw                  m4, m2, q1111
+    pshufw                  m2, m2, q0000
+%endif
+    paddw                   m4, m0
+    paddw                   m2, m0
+    packuswb                m4, m4
+    packuswb                m2, m2
+    movd      [dstq+strideq*0], m4
+    movd      [dstq+strideq*1], m2
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+TM_MMX_FUNCS
+INIT_MMX ssse3
+TM_MMX_FUNCS
+
+%macro TM_XMM_FUNCS 0
+cglobal vp9_ipred_tm_8x8, 4, 4, 5, dst, stride, l, a
+    pxor                    m1, m1
+    movh                    m0, [aq]
+    pinsrw                  m2, [aq-1], 0
+    punpcklbw               m0, m1
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+    mova                    m3, [pw_m256]
+    mova                    m1, [pw_m255]
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    punpcklwd               m2, m2
+    pshufd                  m2, m2, q0000
+%endif
+    psubw                   m0, m2
+    mov                   cntq, 3
+.loop:
+    pinsrw                  m2, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m4, m2, m1
+    pshufb                  m2, m3
+%else
+    punpcklbw               m2, m1
+    punpcklwd               m2, m2
+    pshufd                  m4, m2, q1111
+    pshufd                  m2, m2, q0000
+%endif
+    paddw                   m4, m0
+    paddw                   m2, m0
+    packuswb                m4, m2
+    movh      [dstq+strideq*0], m4
+    movhps    [dstq+strideq*1], m4
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_16x16, 4, 4, 8, dst, stride, l, a
+    pxor                    m3, m3
+    mova                    m0, [aq]
+    pinsrw                  m2, [aq-1], 0
+    punpckhbw               m1, m0, m3
+    punpcklbw               m0, m3
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+    mova                    m4, [pw_m256]
+    mova                    m3, [pw_m255]
+    pshufb                  m2, m4
+%else
+    punpcklbw               m2, m3
+    punpcklwd               m2, m2
+    pshufd                  m2, m2, q0000
+%endif
+    psubw                   m1, m2
+    psubw                   m0, m2
+    mov                   cntq, 7
+.loop:
+    pinsrw                  m7, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m5, m7, m3
+    pshufb                  m7, m4
+%else
+    punpcklbw               m7, m3
+    punpcklwd               m7, m7
+    pshufd                  m5, m7, q1111
+    pshufd                  m7, m7, q0000
+%endif
+    paddw                   m2, m5, m0
+    paddw                   m5, m1
+    paddw                   m6, m7, m0
+    paddw                   m7, m1
+    packuswb                m2, m5
+    packuswb                m6, m7
+    mova      [dstq+strideq*0], m2
+    mova      [dstq+strideq*1], m6
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+%if ARCH_X86_64
+%define mem 0
+%else
+%define mem 64
+%endif
+cglobal vp9_ipred_tm_32x32, 4, 4, 14, mem, dst, stride, l, a
+    pxor                    m5, m5
+    pinsrw                  m4, [aq-1], 0
+    mova                    m0, [aq]
+    mova                    m2, [aq+16]
+    DEFINE_ARGS dst, stride, l, cnt
+%if cpuflag(ssse3)
+%if ARCH_X86_64
+    mova                   m12, [pw_m256]
+    mova                   m13, [pw_m255]
+%define pw_m256_reg m12
+%define pw_m255_reg m13
+%else
+%define pw_m256_reg [pw_m256]
+%define pw_m255_reg [pw_m255]
+%endif
+    pshufb                  m4, pw_m256_reg
+%else
+    punpcklbw               m4, m5
+    punpcklwd               m4, m4
+    pshufd                  m4, m4, q0000
+%endif
+    punpckhbw               m1, m0,  m5
+    punpckhbw               m3, m2,  m5
+    punpcklbw               m0, m5
+    punpcklbw               m2, m5
+    psubw                   m1, m4
+    psubw                   m0, m4
+    psubw                   m3, m4
+    psubw                   m2, m4
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     1, 9
+    SWAP                     2, 10
+    SWAP                     3, 11
+%else
+    mova            [rsp+0*16], m0
+    mova            [rsp+1*16], m1
+    mova            [rsp+2*16], m2
+    mova            [rsp+3*16], m3
+%endif
+    mov                   cntq, 15
+.loop:
+    pinsrw                  m3, [lq+cntq*2], 0
+%if cpuflag(ssse3)
+    pshufb                  m7, m3, pw_m255_reg
+    pshufb                  m3, pw_m256_reg
+%else
+    pxor                    m7, m7
+    punpcklbw               m3, m7
+    punpcklwd               m3, m3
+    pshufd                  m7, m3, q1111
+    pshufd                  m3, m3, q0000
+%endif
+%if ARCH_X86_64
+    paddw                   m4, m7, m8
+    paddw                   m5, m7, m9
+    paddw                   m6, m7, m10
+    paddw                   m7, m11
+    paddw                   m0, m3, m8
+    paddw                   m1, m3, m9
+    paddw                   m2, m3, m10
+    paddw                   m3, m11
+%else
+    paddw                   m4, m7, [rsp+0*16]
+    paddw                   m5, m7, [rsp+1*16]
+    paddw                   m6, m7, [rsp+2*16]
+    paddw                   m7, [rsp+3*16]
+    paddw                   m0, m3, [rsp+0*16]
+    paddw                   m1, m3, [rsp+1*16]
+    paddw                   m2, m3, [rsp+2*16]
+    paddw                   m3, [rsp+3*16]
+%endif
+    packuswb                m4, m5
+    packuswb                m6, m7
+    packuswb                m0, m1
+    packuswb                m2, m3
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m6
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m2
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%undef pw_m256_reg
+%undef pw_m255_reg
+%undef mem
+%endmacro
+
+INIT_XMM sse2
+TM_XMM_FUNCS
+INIT_XMM ssse3
+TM_XMM_FUNCS
+INIT_XMM avx
+TM_XMM_FUNCS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_tm_32x32, 4, 4, 8, dst, stride, l, a
+    pxor                    m3, m3
+    pinsrw                 xm2, [aq-1], 0
+    vinserti128             m2, m2, xm2, 1
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m4, [pw_m256]
+    mova                    m5, [pw_m255]
+    pshufb                  m2, m4
+    punpckhbw               m1, m0, m3
+    punpcklbw               m0, m3
+    psubw                   m1, m2
+    psubw                   m0, m2
+    mov                   cntq, 15
+.loop:
+    pinsrw                 xm7, [lq+cntq*2], 0
+    vinserti128             m7, m7, xm7, 1
+    pshufb                  m3, m7, m5
+    pshufb                  m7, m4
+    paddw                   m2, m3, m0
+    paddw                   m3, m1
+    paddw                   m6, m7, m0
+    paddw                   m7, m1
+    packuswb                m2, m3
+    packuswb                m6, m7
+    mova      [dstq+strideq*0], m2
+    mova      [dstq+strideq*1], m6
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+
+; dl
+
+%macro LOWPASS 4 ; left [dst], center, right, tmp
+    pxor                   m%4, m%1, m%3
+    pand                   m%4, [pb_1]
+    pavgb                  m%1, m%3
+    psubusb                m%1, m%4
+    pavgb                  m%1, m%2
+%endmacro
+
+%macro DL_MMX_FUNCS 0
+cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m1, [aq]
+%if cpuflag(ssse3)
+    pshufb                  m0, m1, [pb_0to5_2x7]
+    pshufb                  m2, m1, [pb_2to6_3x7]
+%else
+    punpckhbw               m3, m1, m1              ; 44556677
+    pand                    m0, m1, [pb_6xm1_2x0]   ; 012345__
+    pand                    m3, [pb_6x0_2xm1]       ; ______77
+    psrlq                   m2, m1, 16              ; 234567__
+    por                     m0, m3                  ; 01234577
+    por                     m2, m3                  ; 23456777
+%endif
+    psrlq                   m1, 8
+    LOWPASS                  0, 1, 2, 3
+
+    pshufw                  m1, m0, q3321
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*2], m1
+    psrlq                   m0, 8
+    psrlq                   m1, 8
+    add                   dstq, strideq
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*2], m1
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DL_MMX_FUNCS
+INIT_MMX ssse3
+DL_MMX_FUNCS
+
+%macro DL_XMM_FUNCS 0
+cglobal vp9_ipred_dl_8x8, 4, 4, 4, dst, stride, stride5, a
+    movq                    m0, [aq]
+    lea               stride5q, [strideq*5]
+%if cpuflag(ssse3)
+    pshufb                  m1, m0, [pb_1to6_10x7]
+%else
+    punpcklbw               m1, m0, m0              ; 0011223344556677
+    punpckhwd               m1, m1                  ; 4x4,4x5,4x6,4x7
+%endif
+    shufps                  m0, m1, q3310
+%if notcpuflag(ssse3)
+    psrldq                  m1, m0, 1
+    shufps                  m1, m0, q3210
+%endif
+    psrldq                  m2, m1, 1
+    LOWPASS                  0, 1, 2, 3
+
+    pshufd                  m1, m0, q3321
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*4], m1
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+stride5q ], m1
+    lea                   dstq, [dstq+strideq*2]
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*4], m1
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+stride5q ], m1
+    RET
+
+cglobal vp9_ipred_dl_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m0, [aq]
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1toE_2xF]
+    pshufb                  m1, m0, m5
+    pshufb                  m2, m1, m5
+    pshufb                  m4, m0, [pb_15]
+%else
+    pand                    m5, m0, [pb_15x0_1xm1]      ; _______________F
+    psrldq                  m1, m0, 1                   ; 123456789ABCDEF_
+    por                     m1, m5                      ; 123456789ABCDEFF
+    psrldq                  m2, m1, 1                   ; 23456789ABCDEFF_
+    por                     m2, m5                      ; 23456789ABCDEFFF
+    pshufhw                 m4, m1, q3333               ; xxxxxxxxFFFFFFFF
+%endif
+    LOWPASS                  0, 1, 2, 3
+    DEFINE_ARGS dst, stride, cnt, stride9
+    lea               stride9q, [strideq+strideq*8]
+    mov                   cntd, 4
+
+.loop:
+    movhlps                 m4, m0
+    mova      [dstq+strideq*0], m0
+%if cpuflag(ssse3)
+    pshufb                  m0, m5
+%else
+    psrldq                  m0, 1
+    por                     m0, m5
+%endif
+    mova      [dstq+strideq*8], m4
+    movhlps                 m4, m0
+    mova      [dstq+strideq*1], m0
+%if cpuflag(ssse3)
+    pshufb                  m0, m5
+%else
+    psrldq                  m0, 1
+    por                     m0, m5
+%endif
+    mova      [dstq+stride9q ], m4
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dl_32x32, 4, 5, 8, dst, stride, cnt, a, dst16
+    mova                    m0, [aq]
+    mova                    m1, [aq+16]
+    PALIGNR                 m2, m1, m0, 1, m4
+    PALIGNR                 m3, m1, m0, 2, m4
+    LOWPASS                  0, 2, 3, 4
+%if cpuflag(ssse3)
+    mova                    m5, [pb_1toE_2xF]
+    pshufb                  m2, m1, m5
+    pshufb                  m3, m2, m5
+    pshufb                  m6, m1, [pb_15]
+    mova                    m7, m6
+%else
+    pand                    m5, m1, [pb_15x0_1xm1]      ; _______________F
+    psrldq                  m2, m1, 1                   ; 123456789ABCDEF_
+    por                     m2, m5                      ; 123456789ABCDEFF
+    psrldq                  m3, m2, 1                   ; 23456789ABCDEFF_
+    por                     m3, m5                      ; 23456789ABCDEFFF
+    pshufhw                 m7, m2, q3333               ; xxxxxxxxFFFFFFFF
+    pshufd                  m6, m7, q3333
+%endif
+    LOWPASS                  1, 2, 3, 4
+    lea                 dst16q, [dstq  +strideq*8]
+    mov                   cntd, 8
+    lea                 dst16q, [dst16q+strideq*8]
+.loop:
+    movhlps                 m7, m1
+    mova [dstq  +strideq*0+ 0], m0
+    mova [dstq  +strideq*0+16], m1
+    movhps [dstq+strideq*8+ 0], m0
+    movq [dstq  +strideq*8+ 8], m1
+    mova [dstq  +strideq*8+16], m7
+    mova [dst16q+strideq*0+ 0], m1
+    mova [dst16q+strideq*0+16], m6
+    mova [dst16q+strideq*8+ 0], m7
+    mova [dst16q+strideq*8+16], m6
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 1
+    pshufb                  m1, m5
+%elif cpuflag(ssse3)
+    palignr                 m2, m1, m0, 1
+    pshufb                  m1, m5
+    mova                    m0, m2
+%else
+    mova                    m4, m1
+    psrldq                  m0, 1
+    pslldq                  m4, 15
+    psrldq                  m1, 1
+    por                     m0, m4
+    por                     m1, m5
+%endif
+    add                   dstq, strideq
+    add                 dst16q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DL_XMM_FUNCS
+INIT_XMM ssse3
+DL_XMM_FUNCS
+INIT_XMM avx
+DL_XMM_FUNCS
+
+; dr
+
+%macro DR_MMX_FUNCS 0
+cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq-1]
+    movd                    m1, [aq+3]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    PALIGNR                 m1, m0, 1, m3
+    psrlq                   m2, m1, 8
+    LOWPASS                  0, 1, 2, 3
+
+    movd      [dstq+stride3q ], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*2], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*1], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*0], m0
+    RET
+%endmacro
+
+INIT_MMX mmxext
+DR_MMX_FUNCS
+INIT_MMX ssse3
+DR_MMX_FUNCS
+
+%macro DR_XMM_FUNCS 0
+cglobal vp9_ipred_dr_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m1, [lq]
+    movhps                  m1, [aq-1]
+    movd                    m2, [aq+7]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pslldq                  m0, m1, 1
+    PALIGNR                 m2, m1, 1, m3
+    LOWPASS                  0, 1, 2, 3
+
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m0
+    pslldq                  m0, 1
+    lea                   dstq, [dstq+strideq*4]
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m0
+    RET
+
+cglobal vp9_ipred_dr_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m1, [lq]
+    movu                    m2, [aq-1]
+    movd                    m4, [aq+15]
+    DEFINE_ARGS dst, stride, stride9, cnt
+    lea               stride9q, [strideq *3]
+    mov                   cntd, 4
+    lea               stride9q, [stride9q*3]
+    PALIGNR                 m4, m2, 1, m5
+    PALIGNR                 m3, m2, m1, 15, m5
+    LOWPASS                  3,  2, 4, 5
+    pslldq                  m0, m1, 1
+    PALIGNR                 m2, m1, 1, m4
+    LOWPASS                  0,  1, 2, 4
+
+.loop:
+    mova    [dstq+strideq*0  ], m3
+    movhps  [dstq+strideq*8+0], m0
+    movq    [dstq+strideq*8+8], m3
+    PALIGNR                 m3, m0, 15, m1
+    pslldq                  m0, 1
+    mova    [dstq+strideq*1  ], m3
+    movhps  [dstq+stride9q +0], m0
+    movq    [dstq+stride9q +8], m3
+    PALIGNR                 m3, m0, 15, m1
+    pslldq                  m0, 1
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dr_32x32, 4, 4, 8, dst, stride, l, a
+    mova                    m1, [lq]
+    mova                    m2, [lq+16]
+    movu                    m3, [aq-1]
+    movu                    m4, [aq+15]
+    movd                    m5, [aq+31]
+    DEFINE_ARGS dst, stride, stride8, cnt
+    lea               stride8q, [strideq*8]
+    PALIGNR                 m5, m4, 1, m7
+    PALIGNR                 m6, m4, m3, 15, m7
+    LOWPASS                  5,  4,  6,  7
+    PALIGNR                 m4, m3, 1, m7
+    PALIGNR                 m6, m3, m2, 15, m7
+    LOWPASS                  4,  3,  6,  7
+    PALIGNR                 m3, m2, 1, m7
+    PALIGNR                 m6, m2, m1, 15, m7
+    LOWPASS                  3,  2,  6,  7
+    PALIGNR                 m2, m1, 1, m6
+    pslldq                  m0, m1, 1
+    LOWPASS                  2,  1,  0,  6
+    mov                   cntd, 16
+
+    ; out=m2/m3/m4/m5
+.loop:
+    mova  [dstq+stride8q*0+ 0], m4
+    mova  [dstq+stride8q*0+16], m5
+    mova  [dstq+stride8q*2+ 0], m3
+    mova  [dstq+stride8q*2+16], m4
+    PALIGNR                 m5, m4, 15, m6
+    PALIGNR                 m4, m3, 15, m6
+    PALIGNR                 m3, m2, 15, m6
+    pslldq                  m2, 1
+    add                   dstq, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DR_XMM_FUNCS
+INIT_XMM ssse3
+DR_XMM_FUNCS
+INIT_XMM avx
+DR_XMM_FUNCS
+
+; vl
+
+INIT_MMX mmxext
+cglobal vp9_ipred_vl_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [aq]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    lea                   dstq, [dstq+strideq*2]
+    psrlq                   m1, 8
+    psrlq                   m2, 8
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    RET
+
+%macro VL_XMM_FUNCS 0
+cglobal vp9_ipred_vl_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m0, [aq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to6_9x7]
+%else
+    punpcklbw               m1, m0, m0
+    punpckhwd               m1, m1
+    shufps                  m0, m1, q3310
+%endif
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m0, 2
+    LOWPASS                  2,  1,  0,  3
+    pavgb                   m1, m0
+
+    movq      [dstq+strideq*0], m1
+    movq      [dstq+strideq*1], m2
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*2], m1
+    movq      [dstq+stride3q ], m2
+    lea                   dstq, [dstq+strideq*4]
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*0], m1
+    movq      [dstq+strideq*1], m2
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*2], m1
+    movq      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_16x16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+%if cpuflag(ssse3)
+    mova                    m4, [pb_1toE_2xF]
+    pshufb                  m1, m0, m4
+    pshufb                  m2, m1, m4
+%else
+    pand                    m4, m0, [pb_15x0_1xm1]  ; _______________F
+    psrldq                  m1, m0, 1               ; 123456789ABCDEF_
+    por                     m1, m4                  ; 123456789ABCDEFF
+    psrldq                  m2, m1, 1               ; 23456789ABCDEFF_
+    por                     m2, m4                  ; 23456789ABCDEFFF
+%endif
+    LOWPASS                  2,  1,  0, 3
+    pavgb                   m1, m0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+%if cpuflag(ssse3)
+    pshufb                  m1, m4
+    pshufb                  m2, m4
+%else
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    por                     m1, m4
+    por                     m2, m4
+%endif
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+%if cpuflag(ssse3)
+    pshufb                  m1, m4
+    pshufb                  m2, m4
+%else
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    por                     m1, m4
+    por                     m2, m4
+%endif
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vl_32x32, 4, 4, 7, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m5, [aq+16]
+    DEFINE_ARGS dst, stride, dst16, cnt
+    PALIGNR                 m2, m5, m0, 1, m4
+    PALIGNR                 m3, m5, m0, 2, m4
+    lea                 dst16q, [dstq  +strideq*8]
+    LOWPASS                  3,  2,  0, 6
+    pavgb                   m2, m0
+%if cpuflag(ssse3)
+    mova                    m4, [pb_1toE_2xF]
+    pshufb                  m0, m5, m4
+    pshufb                  m1, m0, m4
+%else
+    pand                    m4, m5, [pb_15x0_1xm1]  ; _______________F
+    psrldq                  m0, m5, 1               ; 123456789ABCDEF_
+    por                     m0, m4                  ; 123456789ABCDEFF
+    psrldq                  m1, m0, 1               ; 23456789ABCDEFF_
+    por                     m1, m4                  ; 23456789ABCDEFFF
+%endif
+    lea                 dst16q, [dst16q+strideq*8]
+    LOWPASS                  1,  0,  5, 6
+    pavgb                   m0, m5
+%if cpuflag(ssse3)
+    pshufb                  m5, [pb_15]
+%else
+    punpckhbw               m5, m4, m4
+    pshufhw                 m5, m5, q3333
+    punpckhqdq              m5, m5
+%endif
+    mov                   cntd, 8
+
+.loop:
+%macro %%write 3
+    mova    [dstq+stride%1+ 0], %2
+    mova    [dstq+stride%1+16], %3
+    movhps  [dst16q+stride%1 ], %2
+    movu  [dst16q+stride%1+ 8], %3
+    movq  [dst16q+stride%1+24], m5
+%if cpuflag(avx)
+    palignr                 %2, %3, %2, 1
+    pshufb                  %3, m4
+%elif cpuflag(ssse3)
+    palignr                 m6, %3, %2, 1
+    pshufb                  %3, m4
+    mova                    %2, m6
+%else
+    pslldq                  m6, %3, 15
+    psrldq                  %3, 1
+    psrldq                  %2, 1
+    por                     %3, m4
+    por                     %2, m6
+%endif
+%endmacro
+
+    %%write                q*0, m2, m0
+    %%write                q*1, m3, m1
+    lea                   dstq, [dstq  +strideq*2]
+    lea                 dst16q, [dst16q+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+VL_XMM_FUNCS
+INIT_XMM ssse3
+VL_XMM_FUNCS
+INIT_XMM avx
+VL_XMM_FUNCS
+
+; vr
+
+%macro VR_MMX_FUNCS 0
+cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m1, [aq-1]
+    punpckldq               m2, [lq]
+    movd                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pavgb                   m0, m1
+    PALIGNR                 m1, m2, 5, m3
+    psrlq                   m2, m1, 8
+    psllq                   m3, m1, 8
+    LOWPASS                  2,  1, 3, 4
+
+    ; ABCD <- for the following predictor:
+    ; EFGH
+    ; IABC  | m0 contains ABCDxxxx
+    ; JEFG  | m2 contains xJIEFGHx
+
+%if cpuflag(ssse3)
+    punpckldq               m0, m2
+    pshufb                  m2, [pb_13456_3xm1]
+    movd      [dstq+strideq*0], m0
+    pshufb                  m0, [pb_6012_4xm1]
+    movd      [dstq+stride3q ], m2
+    psrlq                   m2, 8
+    movd      [dstq+strideq*2], m0
+    movd      [dstq+strideq*1], m2
+%else
+    psllq                   m1, m2, 40
+    psrlq                   m2, 24
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m2
+    PALIGNR                 m0, m1, 7, m3
+    psllq                   m1, 8
+    PALIGNR                 m2, m1, 7, m3
+    movd      [dstq+strideq*2], m0
+    movd      [dstq+stride3q ], m2
+%endif
+    RET
+%endmacro
+
+INIT_MMX mmxext
+VR_MMX_FUNCS
+INIT_MMX ssse3
+VR_MMX_FUNCS
+
+%macro VR_XMM_FUNCS 1 ; n_xmm_regs for 16x16
+cglobal vp9_ipred_vr_8x8, 4, 4, 5, dst, stride, l, a
+    movu                    m1, [aq-1]
+    movhps                  m2, [lq]
+    movq                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pavgb                   m0, m1
+    PALIGNR                 m1, m2, 9, m3
+    pslldq                  m2, m1, 1
+    pslldq                  m3, m1, 2
+    LOWPASS                  1,  2, 3, 4
+
+    ; ABCDEFGH <- for the following predictor:
+    ; IJKLMNOP
+    ; QABCDEFG  | m0 contains ABCDEFGHxxxxxxxx
+    ; RIJKLMNO  | m1 contains xxVUTSRQIJKLMNOP
+    ; SQABCDEF
+    ; TRIJKLMN
+    ; USQABCDE
+    ; VTRIJKLM
+
+%if cpuflag(ssse3)
+    punpcklqdq              m0, m1 ; ABCDEFGHxxVUTSRQ
+%endif
+    movq      [dstq+strideq*0], m0
+    movhps    [dstq+strideq*1], m1
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_6xm1_BDF_0to6]  ; xxxxxxUSQABCDEFG
+    pshufb                  m1, [pb_6xm1_246_8toE]  ; xxxxxxVTRIJKLMNO
+%else
+    psrlw                   m2, m1, 8               ; x_U_S_Q_xxxxxxxx
+    pand                    m3, m1, [pw_255]        ; x_V_T_R_xxxxxxxx
+    packuswb                m3, m2                  ; xVTRxxxxxUSQxxxx
+    pslldq                  m3, 4                   ; xxxxxVTRxxxxxUSQ
+    PALIGNR                 m0, m3, 7, m4           ; xxxxxxUSQABCDEFG
+    psrldq                  m1, 8
+    pslldq                  m3, 8
+    PALIGNR                 m1, m3, 7, m4           ; xxxxxxVTRIJKLMNO
+%endif
+    movhps    [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m1
+    lea                   dstq, [dstq+strideq*4]
+    pslldq                  m0, 1
+    pslldq                  m1, 1
+    movhps    [dstq+strideq*0], m0
+    movhps    [dstq+strideq*1], m1
+    pslldq                  m0, 1
+    pslldq                  m1, 1
+    movhps    [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m1
+    RET
+
+cglobal vp9_ipred_vr_16x16, 4, 4, %1, dst, stride, l, a
+    mova                    m0, [aq]
+    movu                    m1, [aq-1]
+    mova                    m2, [lq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    PALIGNR                 m3, m1, m2, 15, m6
+    LOWPASS                  3,  1,  0,  4
+    pavgb                   m0, m1
+    PALIGNR                 m1, m2,  1, m6
+    pslldq                  m4, m2,  1
+    LOWPASS                  1,  2,  4,  5
+%if cpuflag(ssse3)
+    pshufb                  m1, [pb_02468ACE_13579BDF]
+%else
+    psrlw                   m5, m1, 8
+    pand                    m1, [pw_255]
+    packuswb                m1, m5
+%endif
+    mov                   cntd, 4
+
+.loop:
+    movlhps                 m2, m1
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m4, m0, m1, 15, m6
+    PALIGNR                 m5, m3, m2, 15, m6
+    mova      [dstq+strideq*2], m4
+    mova      [dstq+stride3q ], m5
+    lea                   dstq, [dstq+strideq*4]
+    PALIGNR                 m0, m1, 14, m6
+    PALIGNR                 m3, m2, 14, m6
+    pslldq                  m1, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vr_32x32, 4, 4, 9, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m2, [aq+16]
+    movu                    m1, [aq-1]
+    PALIGNR                 m3, m2, m0, 15, m6
+    PALIGNR                 m4, m2, m0, 14, m6
+    LOWPASS                  4,  3,  2,  5
+    pavgb                   m3, m2
+    mova                    m2, [lq+16]
+    PALIGNR                 m5, m1, m2, 15, m6
+    LOWPASS                  5,  1,  0,  6
+    pavgb                   m0, m1
+    mova                    m6, [lq]
+%if ARCH_X86_64
+    SWAP                     0, 8
+%else
+    mova                [dstq], m0
+%endif
+    PALIGNR                 m1, m2,  1, m0
+    PALIGNR                 m7, m2, m6, 15, m0
+    LOWPASS                  1,  2,  7,  0
+    PALIGNR                 m2, m6,  1, m0
+    pslldq                  m7, m6,  1
+    LOWPASS                  2,  6,  7,  0
+%if cpuflag(ssse3)
+    pshufb                  m1, [pb_02468ACE_13579BDF]
+    pshufb                  m2, [pb_02468ACE_13579BDF]
+%else
+    psrlw                   m0, m1, 8
+    psrlw                   m6, m2, 8
+    pand                    m1, [pw_255]
+    pand                    m2, [pw_255]
+    packuswb                m1, m0
+    packuswb                m2, m6
+%endif
+    DEFINE_ARGS dst, stride, dst16, cnt
+    lea                 dst16q, [dstq  +strideq*8]
+    lea                 dst16q, [dst16q+strideq*8]
+    SBUTTERFLY             qdq,  2,  1,  6
+%if ARCH_X86_64
+    SWAP                     0, 8
+%else
+    mova                    m0, [dstq]
+%endif
+    mov                   cntd, 8
+
+.loop:
+    ; even lines (0, 2, 4, ...): m1 | m0, m3
+    ;  odd lines (1, 3, 5, ...): m2 | m5, m4
+%macro %%write 4
+    mova    [dstq+stride%1+ 0], %3
+    mova    [dstq+stride%1+16], %4
+    movhps  [dst16q+stride%1 ], %2
+    movu  [dst16q+stride%1+ 8], %3
+    movq  [dst16q+stride%1+24], %4
+    PALIGNR                 %4, %3, 15, m6
+    PALIGNR                 %3, %2, 15, m6
+    pslldq                  %2,  1
+%endmacro
+
+    %%write                q*0, m1, m0, m3
+    %%write                q*1, m2, m5, m4
+    lea                   dstq, [dstq  +strideq*2]
+    lea                 dst16q, [dst16q+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+VR_XMM_FUNCS 7
+INIT_XMM ssse3
+VR_XMM_FUNCS 6
+INIT_XMM avx
+VR_XMM_FUNCS 6
+
+; hd
+
+INIT_MMX mmxext
+cglobal vp9_ipred_hd_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq-1]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0,  3
+    pavgb                   m1, m0
+
+    ; DHIJ <- for the following predictor:
+    ; CGDH
+    ; BFCG  | m1 contains ABCDxxxx
+    ; AEBF  | m2 contains EFGHIJxx
+
+    punpcklbw               m1, m2
+    punpckhdq               m0, m1, m2
+
+    ; m1 contains AEBFCGDH
+    ; m0 contains CGDHIJxx
+
+    movd      [dstq+stride3q ], m1
+    movd      [dstq+strideq*1], m0
+    psrlq                   m1, 16
+    psrlq                   m0, 16
+    movd      [dstq+strideq*2], m1
+    movd      [dstq+strideq*0], m0
+    RET
+
+%macro HD_XMM_FUNCS 0
+cglobal vp9_ipred_hd_8x8, 4, 4, 5, dst, stride, l, a
+    movq                    m0, [lq]
+    movhps                  m0, [aq-1]
+    DEFINE_ARGS dst, stride, stride3, dst4
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m1, 1
+    LOWPASS                  2,  1,  0,  3
+    pavgb                   m1, m0
+
+    ; HPQRSTUV <- for the following predictor
+    ; GOHPQRST
+    ; FNGOHPQR  | m1 contains ABCDEFGHxxxxxxxx
+    ; EMFNGOHP  | m2 contains IJKLMNOPQRSTUVxx
+    ; DLEMFNGO
+    ; CKDLEMFN
+    ; BJCKDLEM
+    ; AIBJCKDL
+
+    punpcklbw               m1, m2
+    movhlps                 m2, m2
+
+    ; m1 contains AIBJCKDLEMFNGOHP
+    ; m2 contains QRSTUVxxxxxxxxxx
+
+    movhps   [dstq +stride3q ], m1
+    movq     [dst4q+stride3q ], m1
+    PALIGNR                 m3, m2, m1, 2, m4
+    movhps   [dstq +strideq*2], m3
+    movq     [dst4q+strideq*2], m3
+    PALIGNR                 m3, m2, m1, 4, m4
+    movhps   [dstq +strideq*1], m3
+    movq     [dst4q+strideq*1], m3
+    PALIGNR                 m2, m1, 6, m4
+    movhps   [dstq +strideq*0], m2
+    movq     [dst4q+strideq*0], m2
+    RET
+
+cglobal vp9_ipred_hd_16x16, 4, 6, 7, dst, stride, l, a
+    mova                    m0, [lq]
+    movu                    m3, [aq-1]
+    DEFINE_ARGS dst, stride, stride4, dst4, dst8, dst12
+    lea               stride4q, [strideq*4]
+    lea                  dst4q, [dstq +stride4q]
+    lea                  dst8q, [dst4q+stride4q]
+    lea                 dst12q, [dst8q+stride4q]
+    psrldq                  m4, m3,  1
+    psrldq                  m5, m3,  2
+    LOWPASS                  5,  4,  3,  6
+    PALIGNR                 m1, m3, m0,  1, m6
+    PALIGNR                 m2, m3, m0,  2, m6
+    LOWPASS                  2,  1,  0,  6
+    pavgb                   m1, m0
+    SBUTTERFLY              bw,  1,  2,  6
+
+    ; I PROBABLY INVERTED L0 ad L16 here
+    ; m1, m2, m5
+.loop:
+    sub               stride4q, strideq
+    movhps [dstq +stride4q +0], m2
+    movq   [dstq +stride4q +8], m5
+    mova   [dst4q+stride4q   ], m2
+    movhps [dst8q+stride4q +0], m1
+    movq   [dst8q+stride4q +8], m2
+    mova  [dst12q+stride4q   ], m1
+%if cpuflag(avx)
+    palignr                 m1, m2, m1, 2
+    palignr                 m2, m5, m2, 2
+%elif cpuflag(ssse3)
+    palignr                 m3, m2, m1, 2
+    palignr                 m0, m5, m2, 2
+    mova                    m1, m3
+    mova                    m2, m0
+%else
+    ; slightly modified version of PALIGNR
+    mova                    m6, m2
+    mova                    m4, m5
+    pslldq                  m6, 14
+    pslldq                  m4, 14
+    psrldq                  m1, 2
+    psrldq                  m2, 2
+    por                     m1, m6
+    por                     m2, m4
+%endif
+    psrldq                  m5, 2
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_32x32, 4, 6, 8, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    movu                    m2, [aq-1]
+    movu                    m3, [aq+15]
+    DEFINE_ARGS dst, stride, stride8, dst8, dst16, dst24
+    lea               stride8q, [strideq*8]
+    lea                  dst8q, [dstq  +stride8q]
+    lea                 dst16q, [dst8q +stride8q]
+    lea                 dst24q, [dst16q+stride8q]
+    psrldq                  m4, m3,  1
+    psrldq                  m5, m3,  2
+    LOWPASS                  5,  4,  3,  6
+    PALIGNR                 m4, m3, m2,  2, m6
+    PALIGNR                 m3, m2,  1, m6
+    LOWPASS                  4,  3,  2,  6
+    PALIGNR                 m3, m2, m1,  2, m6
+    PALIGNR                 m2, m1,  1, m6
+    LOWPASS                  3,  2,  1,  6
+    pavgb                   m2, m1
+    PALIGNR                 m6, m1, m0,  1, m7
+    PALIGNR                 m1, m0,  2, m7
+    LOWPASS                  1,  6,  0,  7
+    pavgb                   m0, m6
+    SBUTTERFLY              bw,  2,  3,  6
+    SBUTTERFLY              bw,  0,  1,  6
+
+    ; m0, m1, m2, m3, m4, m5
+.loop:
+    sub               stride8q, strideq
+    mova  [dstq  +stride8q+ 0], m3
+    mova  [dstq  +stride8q+16], m4
+    mova  [dst8q +stride8q+ 0], m2
+    mova  [dst8q +stride8q+16], m3
+    mova  [dst16q+stride8q+ 0], m1
+    mova  [dst16q+stride8q+16], m2
+    mova  [dst24q+stride8q+ 0], m0
+    mova  [dst24q+stride8q+16], m1
+%if cpuflag(avx)
+    palignr                 m0, m1, m0, 2
+    palignr                 m1, m2, m1, 2
+    palignr                 m2, m3, m2, 2
+    palignr                 m3, m4, m3, 2
+    palignr                 m4, m5, m4, 2
+    psrldq                  m5, 2
+%elif cpuflag(ssse3)
+    psrldq                  m6, m5, 2
+    palignr                 m5, m4, 2
+    palignr                 m4, m3, 2
+    palignr                 m3, m2, 2
+    palignr                 m2, m1, 2
+    palignr                 m1, m0, 2
+    mova                    m0, m1
+    mova                    m1, m2
+    mova                    m2, m3
+    mova                    m3, m4
+    mova                    m4, m5
+    mova                    m5, m6
+%else
+    ; sort of a half-integrated version of PALIGNR
+    pslldq                  m7, m4, 14
+    pslldq                  m6, m5, 14
+    psrldq                  m4, 2
+    psrldq                  m5, 2
+    por                     m4, m6
+    pslldq                  m6, m3, 14
+    psrldq                  m3, 2
+    por                     m3, m7
+    pslldq                  m7, m2, 14
+    psrldq                  m2, 2
+    por                     m2, m6
+    pslldq                  m6, m1, 14
+    psrldq                  m1, 2
+    por                     m1, m7
+    psrldq                  m0, 2
+    por                     m0, m6
+%endif
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HD_XMM_FUNCS
+INIT_XMM ssse3
+HD_XMM_FUNCS
+INIT_XMM avx
+HD_XMM_FUNCS
+
+%macro HU_MMX_FUNCS 0
+cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
+    movd                    m0, [lq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to2_5x3]
+%else
+    punpcklbw               m1, m0, m0          ; 00112233
+    pshufw                  m1, m1, q3333       ; 33333333
+    punpckldq               m0, m1              ; 01233333
+%endif
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    SBUTTERFLY              bw,  1, 2, 0
+    PALIGNR                 m2, m1, 2, m0
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    punpckhdq               m1, m1
+    punpckhdq               m2, m2
+    movd      [dstq+strideq*2], m1
+    movd      [dstq+stride3q ], m2
+    RET
+%endmacro
+
+INIT_MMX mmxext
+HU_MMX_FUNCS
+INIT_MMX ssse3
+HU_MMX_FUNCS
+
+%macro HU_XMM_FUNCS 1 ; n_xmm_regs in hu_32x32
+cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l
+    movq                    m0, [lq]
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to6_9x7]
+%else
+    punpcklbw               m1, m0, m0          ; 0011223344556677
+    punpckhwd               m1, m1              ; 4444555566667777
+    shufps                  m0, m1, q3310       ; 0123456777777777
+%endif
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m1, 1
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride3, dst4
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+    SBUTTERFLY              bw,  1, 2, 0
+    movq     [dstq +strideq*0], m1
+    movhps   [dst4q+strideq*0], m1
+    PALIGNR                 m0, m2, m1, 2, m3
+    movq     [dstq +strideq*1], m0
+    movhps   [dst4q+strideq*1], m0
+    PALIGNR                 m0, m2, m1, 4, m3
+    movq     [dstq +strideq*2], m0
+    movhps   [dst4q+strideq*2], m0
+    PALIGNR                 m2, m1, 6, m3
+    movq     [dstq +stride3q ], m2
+    movhps   [dst4q+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l
+    mova                    m0, [lq]
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2toE_3xF]
+    pshufb                  m1, m0, [pb_1toE_2xF]
+    pshufb                  m2, m0, m3
+%else
+    pand                    m3, m0, [pb_15x0_1xm1]
+    psrldq                  m1, m0, 1
+    por                     m1, m3
+    punpckhbw               m3, m3
+    psrldq                  m2, m0, 2
+    por                     m2, m3
+%endif
+    LOWPASS                  2,  1,  0,  4
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride9, cnt
+    lea                stride9q, [strideq*8+strideq]
+    mov                   cntd,  4
+    SBUTTERFLY              bw,  1,  2,  0
+
+.loop:
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*8], m2
+    PALIGNR                 m0, m2, m1, 2, m4
+%if cpuflag(ssse3)
+    pshufb                  m2, m3
+%else
+    psrldq                  m2, 2
+    por                     m2, m3
+%endif
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+stride9q ], m2
+    PALIGNR                 m1, m2, m0, 2, m4
+%if cpuflag(ssse3)
+    pshufb                  m2, m3
+%else
+    psrldq                  m2, 2
+    por                     m2, m3
+%endif
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hu_32x32, 3, 7, %1, dst, stride, l
+    mova                    m1, [lq]
+    mova                    m0, [lq+16]
+    PALIGNR                 m2, m0, m1,  1, m5
+    PALIGNR                 m3, m0, m1,  2, m5
+    LOWPASS                  3,  2,  1,  5
+    pavgb                   m2, m1
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2toE_3xF]
+    pshufb                  m5, m0, [pb_1toE_2xF]
+    pshufb                  m1, m0, m4
+%else
+    pand                    m4, m0, [pb_15x0_1xm1]
+    psrldq                  m5, m0, 1
+    por                     m5, m4
+    punpckhbw               m4, m4
+    psrldq                  m1, m0, 2
+    por                     m1, m4
+%endif
+    LOWPASS                  1,  5,  0,  6
+    pavgb                   m0, m5
+    DEFINE_ARGS dst, stride, cnt, stride0, dst8, dst16, dst24
+    mov                   cntd,  8
+    xor               stride0q, stride0q
+    lea                  dst8q, [dstq  +strideq*8]
+    lea                 dst16q, [dst8q +strideq*8]
+    lea                 dst24q, [dst16q+strideq*8]
+    SBUTTERFLY              bw,  0,  1,  5
+    SBUTTERFLY              bw,  2,  3,  5
+%if cpuflag(ssse3)
+    pshufb                  m6, m1, [pb_15]
+%else
+    pshufhw                 m6, m4, q3333
+    punpckhqdq              m6, m6
+%endif
+
+.loop:
+    mova  [dstq  +stride0q+ 0], m2
+    mova  [dstq  +stride0q+16], m3
+    mova  [dst8q +stride0q+ 0], m3
+    mova  [dst8q +stride0q+16], m0
+    mova  [dst16q+stride0q+ 0], m0
+    mova  [dst16q+stride0q+16], m1
+    mova  [dst24q+stride0q+ 0], m1
+    mova  [dst24q+stride0q+16], m6
+%if cpuflag(avx)
+    palignr                 m2, m3, m2, 2
+    palignr                 m3, m0, m3, 2
+    palignr                 m0, m1, m0, 2
+    pshufb                  m1, m4
+%elif cpuflag(ssse3)
+    pshufb                  m5, m1, m4
+    palignr                 m1, m0, 2
+    palignr                 m0, m3, 2
+    palignr                 m3, m2, 2
+    mova                    m2, m3
+    mova                    m3, m0
+    mova                    m0, m1
+    mova                    m1, m5
+%else
+    ; half-integrated version of PALIGNR
+    pslldq                  m5, m1, 14
+    pslldq                  m7, m0, 14
+    psrldq                  m1, 2
+    psrldq                  m0, 2
+    por                     m1, m4
+    por                     m0, m5
+    pslldq                  m5, m3, 14
+    psrldq                  m3, 2
+    por                     m3, m7
+    psrldq                  m2, 2
+    por                     m2, m5
+%endif
+    add               stride0q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HU_XMM_FUNCS 8
+INIT_XMM ssse3
+HU_XMM_FUNCS 7
+INIT_XMM avx
+HU_XMM_FUNCS 7
+
+; FIXME 127, 128, 129 ?
diff --git a/media/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm
new file mode 100644
index 000000000..c0ac16d3e
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -0,0 +1,2135 @@
+;******************************************************************************
+;* VP9 Intra prediction SIMD optimizations
+;*
+;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
+;* Copyright (c) 2015 Henrik Gramner <henrik gramner com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pd_2: times 8 dd 2
+pd_4: times 8 dd 4
+pd_8: times 8 dd 8
+
+pb_2to15_14_15: db 2,3,4,5,6,7,8,9,10,11,12,13,14,15,14,15
+pb_4_5_8to13_8x0: db 4,5,8,9,10,11,12,13,0,0,0,0,0,0,0,0
+pb_0to7_67x4: db 0,1,2,3,4,5,6,7,6,7,6,7,6,7,6,7
+
+cextern pw_1
+cextern pw_1023
+cextern pw_4095
+cextern pd_16
+cextern pd_32
+cextern pd_65535;
+
+; FIXME most top-only functions (ddl, vl, v, dc_top) can be modified to take
+; only 3 registers on x86-32, which would make it one cycle faster, but that
+; would make the code quite a bit uglier...
+
+SECTION .text
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+INIT_MMX mmx
+cglobal vp9_ipred_v_4x4_16, 2, 4, 1, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_8x8_16, 2, 4, 1, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_16x16_16, 2, 4, 2, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    mova                    m1, [aq+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m1
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m1
+    lea                   dstq, [dstq+strideq*4]
+    dec               cntd
+    jg .loop
+    RET
+
+INIT_XMM sse
+cglobal vp9_ipred_v_32x32_16, 2, 4, 4, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]
+    mova                    m1, [aq+mmsize*1]
+    mova                    m2, [aq+mmsize*2]
+    mova                    m3, [aq+mmsize*3]
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 16
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*1+32], m2
+    mova   [dstq+strideq*1+48], m3
+    lea                   dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop
+    RET
+
+INIT_MMX mmxext
+cglobal vp9_ipred_h_4x4_16, 3, 3, 4, dst, stride, l, a
+    mova                    m3, [lq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pshufw                  m0, m3, q3333
+    pshufw                  m1, m3, q2222
+    pshufw                  m2, m3, q1111
+    pshufw                  m3, m3, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_8x8_16, 3, 3, 4, dst, stride, l, a
+    mova                    m2, [lq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    punpckhwd               m3, m2, m2
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufd                  m0, m3, q1111
+    pshufd                  m1, m3, q0000
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m1
+    lea                   dstq, [dstq+strideq*4]
+    punpcklwd               m2, m2
+    pshufd                  m0, m2, q3333
+    pshufd                  m1, m2, q2222
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufd                  m0, m2, q1111
+    pshufd                  m1, m2, q0000
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m1
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_16x16_16, 3, 5, 4, dst, stride, l, stride3, cnt
+    mov                   cntd, 3
+    lea               stride3q, [strideq*3]
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    mova    [dstq+strideq*0+ 0], m0
+    mova    [dstq+strideq*0+16], m0
+    mova    [dstq+strideq*1+ 0], m1
+    mova    [dstq+strideq*1+16], m1
+    mova    [dstq+strideq*2+ 0], m2
+    mova    [dstq+strideq*2+16], m2
+    mova    [dstq+stride3q + 0], m3
+    mova    [dstq+stride3q +16], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_h_32x32_16, 3, 5, 4, dst, stride, l, stride3, cnt
+    mov                   cntd, 7
+    lea               stride3q, [strideq*3]
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*1+32], m1
+    mova   [dstq+strideq*1+48], m1
+    mova   [dstq+strideq*2+ 0], m2
+    mova   [dstq+strideq*2+16], m2
+    mova   [dstq+strideq*2+32], m2
+    mova   [dstq+strideq*2+48], m2
+    mova   [dstq+stride3q + 0], m3
+    mova   [dstq+stride3q +16], m3
+    mova   [dstq+stride3q +32], m3
+    mova   [dstq+stride3q +48], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+INIT_MMX mmxext
+cglobal vp9_ipred_dc_4x4_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufw                  m1, m0, q3232
+    paddd                   m0, [pd_4]
+    paddd                   m0, m1
+    psrad                   m0, 3
+    pshufw                  m0, m0, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_8x8_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_8]
+    paddd                   m0, m1
+    psrad                   m0, 4
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_16x16_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq]
+    paddw                   m0, [lq+mmsize]
+    paddw                   m0, [aq]
+    paddw                   m0, [aq+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_16]
+    paddd                   m0, m1
+    psrad                   m0, 5
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_32x32_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [lq+mmsize*0]
+    paddw                   m0, [lq+mmsize*1]
+    paddw                   m0, [lq+mmsize*2]
+    paddw                   m0, [lq+mmsize*3]
+    paddw                   m0, [aq+mmsize*0]
+    paddw                   m0, [aq+mmsize*1]
+    paddw                   m0, [aq+mmsize*2]
+    paddw                   m0, [aq+mmsize*3]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 16
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_32]
+    paddd                   m0, m1
+    psrad                   m0, 6
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*1+32], m0
+    mova   [dstq+strideq*1+48], m0
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+%macro DC_1D_FNS 2
+INIT_MMX mmxext
+cglobal vp9_ipred_dc_%1_4x4_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufw                  m1, m0, q3232
+    paddd                   m0, [pd_2]
+    paddd                   m0, m1
+    psrad                   m0, 2
+    pshufw                  m0, m0, q0000
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_8x8_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_4]
+    paddd                   m0, m1
+    psrad                   m0, 3
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_16x16_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2]
+    paddw                   m0, [%2+mmsize]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_8]
+    paddd                   m0, m1
+    psrad                   m0, 4
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_dc_%1_32x32_16, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [%2+mmsize*0]
+    paddw                   m0, [%2+mmsize*1]
+    paddw                   m0, [%2+mmsize*2]
+    paddw                   m0, [%2+mmsize*3]
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 16
+    pmaddwd                 m0, [pw_1]
+    pshufd                  m1, m0, q3232
+    paddd                   m0, m1
+    pshufd                  m1, m0, q1111
+    paddd                   m0, [pd_16]
+    paddd                   m0, m1
+    psrad                   m0, 5
+    pshuflw                 m0, m0, q0000
+    punpcklqdq              m0, m0
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*0+32], m0
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*1+32], m0
+    mova   [dstq+strideq*1+48], m0
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+DC_1D_FNS top,  aq
+DC_1D_FNS left, lq
+
+INIT_MMX mmxext
+cglobal vp9_ipred_tm_4x4_10, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pw_1023]
+.body:
+    mova                    m4, [aq]
+    mova                    m3, [lq]
+    movd                    m0, [aq-4]
+    pshufw                  m0, m0, q1111
+    psubw                   m4, m0
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pshufw                  m0, m3, q3333
+    pshufw                  m1, m3, q2222
+    pshufw                  m2, m3, q1111
+    pshufw                  m3, m3, q0000
+    paddw                   m0, m4
+    paddw                   m1, m4
+    paddw                   m2, m4
+    paddw                   m3, m4
+    pxor                    m4, m4
+    pmaxsw                  m0, m4
+    pmaxsw                  m1, m4
+    pmaxsw                  m2, m4
+    pmaxsw                  m3, m4
+    pminsw                  m0, m5
+    pminsw                  m1, m5
+    pminsw                  m2, m5
+    pminsw                  m3, m5
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    RET
+
+cglobal vp9_ipred_tm_4x4_12, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_4x4_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_8x8_10, 4, 5, 7, dst, stride, l, a
+    mova                    m4, [pw_1023]
+.body:
+    pxor                    m6, m6
+    mova                    m5, [aq]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m5, m0
+    DEFINE_ARGS dst, stride, l, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 1
+.loop:
+    movh                    m3, [lq+cntq*8]
+    punpcklwd               m3, m3
+    pshufd                  m0, m3, q3333
+    pshufd                  m1, m3, q2222
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m5
+    paddw                   m1, m5
+    paddw                   m2, m5
+    paddw                   m3, m5
+    pmaxsw                  m0, m6
+    pmaxsw                  m1, m6
+    pmaxsw                  m2, m6
+    pmaxsw                  m3, m6
+    pminsw                  m0, m4
+    pminsw                  m1, m4
+    pminsw                  m2, m4
+    pminsw                  m3, m4
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_8x8_12, 4, 5, 7, dst, stride, l, a
+    mova                    m4, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_8x8_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_16x16_10, 4, 4, 8, dst, stride, l, a
+    mova                    m7, [pw_1023]
+.body:
+    pxor                    m6, m6
+    mova                    m4, [aq]
+    mova                    m5, [aq+mmsize]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m4, m0
+    psubw                   m5, m0
+    DEFINE_ARGS dst, stride, l, cnt
+    mov                   cntd, 7
+.loop:
+    movd                    m3, [lq+cntq*4]
+    punpcklwd               m3, m3
+    pshufd                  m2, m3, q1111
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m2, m4
+    paddw                   m2, m5
+    paddw                   m1, m3, m4
+    paddw                   m3, m5
+    pmaxsw                  m0, m6
+    pmaxsw                  m2, m6
+    pmaxsw                  m1, m6
+    pmaxsw                  m3, m6
+    pminsw                  m0, m7
+    pminsw                  m2, m7
+    pminsw                  m1, m7
+    pminsw                  m3, m7
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m2
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m3
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_16x16_12, 4, 4, 8, dst, stride, l, a
+    mova                    m7, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_16x16_10 %+ SUFFIX).body
+
+INIT_XMM sse2
+cglobal vp9_ipred_tm_32x32_10, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a
+    mova                    m0, [pw_1023]
+.body:
+    pxor                    m1, m1
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     1, 9
+%define reg_min m9
+%define reg_max m8
+%else
+    mova              [rsp+ 0], m0
+    mova              [rsp+16], m1
+%define reg_min [rsp+16]
+%define reg_max [rsp+ 0]
+%endif
+
+    mova                    m4, [aq+mmsize*0]
+    mova                    m5, [aq+mmsize*1]
+    mova                    m6, [aq+mmsize*2]
+    mova                    m7, [aq+mmsize*3]
+    movd                    m0, [aq-4]
+    pshuflw                 m0, m0, q1111
+    punpcklqdq              m0, m0
+    psubw                   m4, m0
+    psubw                   m5, m0
+    psubw                   m6, m0
+    psubw                   m7, m0
+    DEFINE_ARGS dst, stride, l, cnt
+    mov                   cntd, 31
+.loop:
+    pinsrw                  m3, [lq+cntq*2], 0
+    punpcklwd               m3, m3
+    pshufd                  m3, m3, q0000
+    paddw                   m0, m3, m4
+    paddw                   m1, m3, m5
+    paddw                   m2, m3, m6
+    paddw                   m3, m7
+    pmaxsw                  m0, reg_min
+    pmaxsw                  m1, reg_min
+    pmaxsw                  m2, reg_min
+    pmaxsw                  m3, reg_min
+    pminsw                  m0, reg_max
+    pminsw                  m1, reg_max
+    pminsw                  m2, reg_max
+    pminsw                  m3, reg_max
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+    add                   dstq, strideq
+    dec                   cntd
+    jge .loop
+    RET
+
+cglobal vp9_ipred_tm_32x32_12, 4, 4, 10, 32 * -ARCH_X86_32, dst, stride, l, a
+    mova                    m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_ipred_tm_32x32_10 %+ SUFFIX).body
+
+; Directional intra predicion functions
+;
+; in the functions below, 'abcdefgh' refers to above data (sometimes simply
+; abbreviated as a[N-M]). 'stuvwxyz' refers to left data (sometimes simply
+; abbreviated as l[N-M]). * is top-left data. ABCDEFG or A[N-M] is filtered
+; above data, STUVWXYZ or L[N-M] is filtered left data, and # is filtered
+; top-left data.
+
+; left=(left+2*center+right+2)>>2
+%macro LOWPASS 3 ; left [dst], center, right
+    paddw                  m%1, m%3
+    psraw                  m%1, 1
+    pavgw                  m%1, m%2
+%endmacro
+
+; abcdefgh (src) -> bcdefghh (dst)
+; dst/src can be the same register
+%macro SHIFT_RIGHT 2-3 [pb_2to15_14_15] ; dst, src, [ssse3_shift_reg]
+%if cpuflag(ssse3)
+    pshufb                  %1, %2, %3              ; abcdefgh -> bcdefghh
+%else
+    psrldq                  %1, %2, 2               ; abcdefgh -> bcdefgh.
+    pshufhw                 %1, %1, q2210           ; bcdefgh. -> bcdefghh
+%endif
+%endmacro
+
+; abcdefgh (src) -> bcdefghh (dst1) and cdefghhh (dst2)
+%macro SHIFT_RIGHTx2 3-4 [pb_2to15_14_15] ; dst1, dst2, src, [ssse3_shift_reg]
+%if cpuflag(ssse3)
+    pshufb                  %1, %3, %4              ; abcdefgh -> bcdefghh
+    pshufb                  %2, %1, %4              ; bcdefghh -> cdefghhh
+%else
+    psrldq                  %1, %3, 2               ; abcdefgh -> bcdefgh.
+    psrldq                  %2, %3, 4               ; abcdefgh -> cdefgh..
+    pshufhw                 %1, %1, q2210           ; bcdefgh. -> bcdefghh
+    pshufhw                 %2, %2, q1110           ; cdefgh.. -> cdefghhh
+%endif
+%endmacro
+
+%macro DL_FUNCS 0
+cglobal vp9_ipred_dl_4x4_16, 2, 4, 3, dst, stride, l, a
+    movifnidn               aq, amp
+    movu                    m1, [aq]                ; abcdefgh
+    pshufhw                 m0, m1, q3310           ; abcdefhh
+    SHIFT_RIGHT             m1, m1                  ; bcdefghh
+    psrldq                  m2, m1, 2               ; cdefghh.
+    LOWPASS                  0,  1,  2              ; BCDEFGh.
+    pshufd                  m1, m0, q3321           ; DEFGh...
+    movh      [dstq+strideq*0], m0
+    movh      [dstq+strideq*2], m1
+    add                   dstq, strideq
+    psrldq                  m0, 2                   ; CDEFGh..
+    psrldq                  m1, 2                   ; EFGh....
+    movh      [dstq+strideq*0], m0
+    movh      [dstq+strideq*2], m1
+    RET
+
+cglobal vp9_ipred_dl_8x8_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m4          ; bcdefghh/cdefghhh
+    LOWPASS                  0,  1,  2              ; BCDEFGHh
+    shufps                  m1, m0, m2, q3332       ; FGHhhhhh
+    shufps                  m3, m0, m1, q2121       ; DEFGHhhh
+    DEFINE_ARGS dst, stride, stride5
+    lea               stride5q, [strideq*5]
+
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*4], m1
+    SHIFT_RIGHT             m0, m0, m4              ; CDEFGHhh
+    pshuflw                 m1, m1, q3321           ; GHhhhhhh
+    pshufd                  m2, m0, q3321           ; EFGHhhhh
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+stride5q ], m1
+    lea                   dstq, [dstq+strideq*2]
+    pshuflw                 m1, m1, q3321           ; Hhhhhhhh
+    mova      [dstq+strideq*0], m3
+    mova      [dstq+strideq*4], m1
+    pshuflw                 m1, m1, q3321           ; hhhhhhhh
+    mova      [dstq+strideq*1], m2
+    mova      [dstq+stride5q ], m1
+    RET
+
+cglobal vp9_ipred_dl_16x16_16, 2, 4, 5, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+    mova                    m3, [aq+mmsize]         ; ijklmnop
+    PALIGNR                 m1, m3, m0, 2, m4       ; bcdefghi
+    PALIGNR                 m2, m3, m0, 4, m4       ; cdefghij
+    LOWPASS                  0,  1,  2              ; BCDEFGHI
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m2, m1, m3, m4          ; jklmnopp/klmnoppp
+    LOWPASS                  1,  2,  3              ; JKLMNOPp
+    pshufd                  m2, m2, q3333           ; pppppppp
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*8+ 0], m1
+    mova   [dstq+strideq*8+16], m2
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 2
+%else
+    PALIGNR                 m3, m1, m0, 2, m4
+    mova                    m0, m3
+%endif
+    SHIFT_RIGHT             m1, m1, m4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dl_32x32_16, 2, 5, 7, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]       ; abcdefgh
+    mova                    m1, [aq+mmsize*1]       ; ijklmnop
+    mova                    m2, [aq+mmsize*2]       ; qrstuvwx
+    mova                    m3, [aq+mmsize*3]       ; yz012345
+    PALIGNR                 m4, m1, m0, 2, m6
+    PALIGNR                 m5, m1, m0, 4, m6
+    LOWPASS                  0,  4,  5              ; BCDEFGHI
+    PALIGNR                 m4, m2, m1, 2, m6
+    PALIGNR                 m5, m2, m1, 4, m6
+    LOWPASS                  1,  4,  5              ; JKLMNOPQ
+    PALIGNR                 m4, m3, m2, 2, m6
+    PALIGNR                 m5, m3, m2, 4, m6
+    LOWPASS                  2,  4,  5              ; RSTUVWXY
+%if cpuflag(ssse3)
+    mova                    m6, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m4, m5, m3, m6
+    LOWPASS                  3,  4,  5              ; Z0123455
+    pshufd                  m4, m4, q3333           ; 55555555
+    DEFINE_ARGS dst, stride, stride8, stride24, cnt
+    mov                   cntd, 8
+    lea               stride8q, [strideq*8]
+    lea              stride24q, [stride8q*3]
+
+.loop:
+    mova  [dstq+stride8q*0+ 0], m0
+    mova  [dstq+stride8q*0+16], m1
+    mova  [dstq+stride8q*0+32], m2
+    mova  [dstq+stride8q*0+48], m3
+    mova  [dstq+stride8q*1+ 0], m1
+    mova  [dstq+stride8q*1+16], m2
+    mova  [dstq+stride8q*1+32], m3
+    mova  [dstq+stride8q*1+48], m4
+    mova  [dstq+stride8q*2+ 0], m2
+    mova  [dstq+stride8q*2+16], m3
+    mova  [dstq+stride8q*2+32], m4
+    mova  [dstq+stride8q*2+48], m4
+    mova  [dstq+stride24q + 0], m3
+    mova  [dstq+stride24q +16], m4
+    mova  [dstq+stride24q +32], m4
+    mova  [dstq+stride24q +48], m4
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 2
+    vpalignr                m1, m2, m1, 2
+    vpalignr                m2, m3, m2, 2
+%else
+    PALIGNR                 m5, m1, m0, 2, m6
+    mova                    m0, m5
+    PALIGNR                 m5, m2, m1, 2, m6
+    mova                    m1, m5
+    PALIGNR                 m5, m3, m2, 2, m6
+    mova                    m2, m5
+%endif
+    SHIFT_RIGHT             m3, m3, m6
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DL_FUNCS
+INIT_XMM ssse3
+DL_FUNCS
+INIT_XMM avx
+DL_FUNCS
+
+%macro DR_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_dr_4x4_16, 4, 4, 3, dst, stride, l, a
+    movh                    m0, [lq]                ; wxyz....
+    movhps                  m0, [aq-2]              ; wxyz*abc
+    movd                    m1, [aq+6]              ; d.......
+    PALIGNR                 m1, m0, 2, m2           ; xyz*abcd
+    psrldq                  m2, m1, 2               ; yz*abcd.
+    LOWPASS                  0, 1, 2                ; XYZ#ABC.
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+stride3q ], m0
+    psrldq                  m0, 2                   ; YZ#ABC..
+    movh      [dstq+strideq*2], m0
+    psrldq                  m0, 2                   ; Z#ABC...
+    movh      [dstq+strideq*1], m0
+    psrldq                  m0, 2                   ; #ABC....
+    movh      [dstq+strideq*0], m0
+    RET
+
+cglobal vp9_ipred_dr_8x8_16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]                ; stuvwxyz
+    movu                    m1, [aq-2]              ; *abcdefg
+    mova                    m2, [aq]                ; abcdefgh
+    psrldq                  m3, m2, 2               ; bcdefgh.
+    LOWPASS                  3,  2, 1               ; ABCDEFG.
+    PALIGNR                 m1, m0, 2, m4           ; tuvwxyz*
+    PALIGNR                 m2, m1, 2, m4           ; uvwxyz*a
+    LOWPASS                  2,  1, 0               ; TUVWXYZ#
+    DEFINE_ARGS dst, stride, dst4, stride3
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+
+    movhps [dstq +stride3q +0], m2
+    movh   [dstq+ stride3q +8], m3
+    mova   [dst4q+stride3q +0], m2
+    PALIGNR                 m1, m3, m2, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*2+0], m1
+    movh   [dstq+ strideq*2+8], m3
+    mova   [dst4q+strideq*2+0], m1
+    PALIGNR                 m2, m3, m1, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*1+0], m2
+    movh   [dstq+ strideq*1+8], m3
+    mova   [dst4q+strideq*1+0], m2
+    PALIGNR                 m1, m3, m2, 2, m0
+    psrldq                  m3, 2
+    movhps [dstq +strideq*0+0], m1
+    movh   [dstq+ strideq*0+8], m3
+    mova   [dst4q+strideq*0+0], m1
+    RET
+
+cglobal vp9_ipred_dr_16x16_16, 4, 4, 7, dst, stride, l, a
+    mova                    m0, [lq]                ; klmnopqr
+    mova                    m1, [lq+mmsize]         ; stuvwxyz
+    movu                    m2, [aq-2]              ; *abcdefg
+    movu                    m3, [aq+mmsize-2]       ; hijklmno
+    mova                    m4, [aq]                ; abcdefgh
+    mova                    m5, [aq+mmsize]         ; ijklmnop
+    psrldq                  m6, m5, 2               ; jklmnop.
+    LOWPASS                  6,  5, 3               ; IJKLMNO.
+    PALIGNR                 m5, m4, 2, m3           ; bcdefghi
+    LOWPASS                  5,  4, 2               ; ABCDEFGH
+    PALIGNR                 m2, m1, 2, m3           ; tuvwxyz*
+    PALIGNR                 m4, m2, 2, m3           ; uvwxyz*a
+    LOWPASS                  4,  2, 1               ; TUVWXYZ#
+    PALIGNR                 m1, m0, 2, m3           ; lmnopqrs
+    PALIGNR                 m2, m1, 2, m3           ; mnopqrst
+    LOWPASS                  2, 1, 0                ; LMNOPQRS
+    DEFINE_ARGS dst, stride, dst8, cnt
+    lea                  dst8q, [dstq+strideq*8]
+    mov                   cntd, 8
+
+.loop:
+    sub                  dst8q, strideq
+    mova  [dst8q+strideq*0+ 0], m4
+    mova  [dst8q+strideq*0+16], m5
+    mova  [dst8q+strideq*8+ 0], m2
+    mova  [dst8q+strideq*8+16], m4
+%if cpuflag(avx)
+    vpalignr                m2, m4, m2, 2
+    vpalignr                m4, m5, m4, 2
+    vpalignr                m5, m6, m5, 2
+%else
+    PALIGNR                 m0, m4, m2, 2, m1
+    mova                    m2, m0
+    PALIGNR                 m0, m5, m4, 2, m1
+    mova                    m4, m0
+    PALIGNR                 m0, m6, m5, 2, m1
+    mova                    m5, m0
+%endif
+    psrldq                  m6, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_dr_32x32_16, 4, 5, 10 + notcpuflag(ssse3), \
+                               %1 * ARCH_X86_32 * -mmsize, dst, stride, l, a
+    mova                    m0, [aq+mmsize*3]       ; a[24-31]
+    movu                    m1, [aq+mmsize*3-2]     ; a[23-30]
+    psrldq                  m2, m0, 2               ; a[25-31].
+    LOWPASS                  2,  0, 1               ; A[24-30].
+    mova                    m1, [aq+mmsize*2]       ; a[16-23]
+    movu                    m3, [aq+mmsize*2-2]     ; a[15-22]
+    PALIGNR                 m0, m1, 2, m4           ; a[17-24]
+    LOWPASS                  0,  1, 3               ; A[16-23]
+    mova                    m3, [aq+mmsize*1]       ; a[8-15]
+    movu                    m4, [aq+mmsize*1-2]     ; a[7-14]
+    PALIGNR                 m1, m3, 2, m5           ; a[9-16]
+    LOWPASS                  1,  3, 4               ; A[8-15]
+    mova                    m4, [aq+mmsize*0]       ; a[0-7]
+    movu                    m5, [aq+mmsize*0-2]     ; *a[0-6]
+    PALIGNR                 m3, m4, 2, m6           ; a[1-8]
+    LOWPASS                  3,  4, 5               ; A[0-7]
+    SCRATCH                  1,  8, rsp+0*mmsize
+    SCRATCH                  3,  9, rsp+1*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  0, 10, rsp+2*mmsize
+%endif
+    mova                    m6, [lq+mmsize*3]       ; l[24-31]
+    PALIGNR                 m5, m6, 2, m0           ; l[25-31]*
+    PALIGNR                 m4, m5, 2, m0           ; l[26-31]*a
+    LOWPASS                  4,  5, 6               ; L[25-31]#
+    mova                    m7, [lq+mmsize*2]       ; l[16-23]
+    PALIGNR                 m6, m7, 2, m0           ; l[17-24]
+    PALIGNR                 m5, m6, 2, m0           ; l[18-25]
+    LOWPASS                  5,  6, 7               ; L[17-24]
+    mova                    m1, [lq+mmsize*1]       ; l[8-15]
+    PALIGNR                 m7, m1, 2, m0           ; l[9-16]
+    PALIGNR                 m6, m7, 2, m0           ; l[10-17]
+    LOWPASS                  6,  7, 1               ; L[9-16]
+    mova                    m3, [lq+mmsize*0]       ; l[0-7]
+    PALIGNR                 m1, m3, 2, m0           ; l[1-8]
+    PALIGNR                 m7, m1, 2, m0           ; l[2-9]
+    LOWPASS                  7,  1, 3               ; L[1-8]
+%if cpuflag(ssse3)
+%if cpuflag(avx)
+    UNSCRATCH                1,  8, rsp+0*mmsize
+%endif
+    UNSCRATCH                3,  9, rsp+1*mmsize
+%else
+    UNSCRATCH                0, 10, rsp+2*mmsize
+%endif
+    DEFINE_ARGS dst8, stride, stride8, stride24, cnt
+    lea               stride8q, [strideq*8]
+    lea              stride24q, [stride8q*3]
+    lea                  dst8q, [dst8q+strideq*8]
+    mov                   cntd, 8
+
+.loop:
+    sub                  dst8q, strideq
+%if notcpuflag(avx)
+    UNSCRATCH                1,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                3,  9, rsp+1*mmsize
+%endif
+%endif
+    mova [dst8q+stride8q*0+ 0], m4
+    mova [dst8q+stride8q*0+16], m3
+    mova [dst8q+stride8q*0+32], m1
+    mova [dst8q+stride8q*0+48], m0
+    mova [dst8q+stride8q*1+ 0], m5
+    mova [dst8q+stride8q*1+16], m4
+    mova [dst8q+stride8q*1+32], m3
+    mova [dst8q+stride8q*1+48], m1
+    mova [dst8q+stride8q*2+ 0], m6
+    mova [dst8q+stride8q*2+16], m5
+    mova [dst8q+stride8q*2+32], m4
+    mova [dst8q+stride8q*2+48], m3
+    mova [dst8q+stride24q + 0], m7
+    mova [dst8q+stride24q +16], m6
+    mova [dst8q+stride24q +32], m5
+    mova [dst8q+stride24q +48], m4
+%if cpuflag(avx)
+    vpalignr                m7, m6, m7, 2
+    vpalignr                m6, m5, m6, 2
+    vpalignr                m5, m4, m5, 2
+    vpalignr                m4, m3, m4, 2
+    vpalignr                m3, m1, m3, 2
+    vpalignr                m1, m0, m1, 2
+    vpalignr                m0, m2, m0, 2
+%else
+    SCRATCH                  2,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  0,  9, rsp+1*mmsize
+%endif
+    PALIGNR                 m2, m6, m7, 2, m0
+    mova                    m7, m2
+    PALIGNR                 m2, m5, m6, 2, m0
+    mova                    m6, m2
+    PALIGNR                 m2, m4, m5, 2, m0
+    mova                    m5, m2
+    PALIGNR                 m2, m3, m4, 2, m0
+    mova                    m4, m2
+    PALIGNR                 m2, m1, m3, 2, m0
+    mova                    m3, m2
+%if notcpuflag(ssse3)
+    UNSCRATCH                0,  9, rsp+1*mmsize
+    SCRATCH                  3,  9, rsp+1*mmsize
+%endif
+    PALIGNR                 m2, m0, m1, 2, m3
+    mova                    m1, m2
+    UNSCRATCH                2,  8, rsp+0*mmsize
+    SCRATCH                  1,  8, rsp+0*mmsize
+    PALIGNR                 m1, m2, m0, 2, m3
+    mova                    m0, m1
+%endif
+    psrldq                  m2, 2
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+DR_FUNCS 3
+INIT_XMM ssse3
+DR_FUNCS 2
+INIT_XMM avx
+DR_FUNCS 2
+
+%macro VL_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_vl_4x4_16, 2, 4, 3, dst, stride, l, a
+    movifnidn               aq, amp
+    movu                    m0, [aq]                ; abcdefgh
+    psrldq                  m1, m0, 2               ; bcdefgh.
+    psrldq                  m2, m0, 4               ; cdefgh..
+    LOWPASS                  2,  1, 0               ; BCDEFGH.
+    pavgw                   m1, m0                  ; ABCDEFG.
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+strideq*0], m1
+    movh      [dstq+strideq*1], m2
+    psrldq                  m1, 2
+    psrldq                  m2, 2
+    movh      [dstq+strideq*2], m1
+    movh      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_8x8_16, 2, 4, 4, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]                ; abcdefgh
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m3          ; bcdefghh/cdefghhh
+    LOWPASS                  2,  1, 0               ; BCDEFGHh
+    pavgw                   m1, m0                  ; ABCDEFGh
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    lea                   dstq, [dstq+strideq*4]
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    SHIFT_RIGHT             m1, m1, m3
+    SHIFT_RIGHT             m2, m2, m3
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vl_16x16_16, 2, 4, 6, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq]
+    mova                    m1, [aq+mmsize]
+    PALIGNR                 m2, m1, m0, 2, m3
+    PALIGNR                 m3, m1, m0, 4, m4
+    LOWPASS                  3,  2,  0
+    pavgw                   m2, m0
+%if cpuflag(ssse3)
+    mova                    m4, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m5, m0, m1, m4
+    LOWPASS                  0,  5,  1
+    pavgw                   m1, m5
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m2
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m3
+    mova   [dstq+strideq*1+16], m0
+    lea                   dstq, [dstq+strideq*2]
+%if cpuflag(avx)
+    vpalignr                m2, m1, m2, 2
+    vpalignr                m3, m0, m3, 2
+%else
+    PALIGNR                 m5, m1, m2, 2, m4
+    mova                    m2, m5
+    PALIGNR                 m5, m0, m3, 2, m4
+    mova                    m3, m5
+%endif
+    SHIFT_RIGHT             m1, m1, m4
+    SHIFT_RIGHT             m0, m0, m4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vl_32x32_16, 2, 5, 11, %1 * mmsize * ARCH_X86_32, dst, stride, l, a
+    movifnidn               aq, amp
+    mova                    m0, [aq+mmsize*0]
+    mova                    m1, [aq+mmsize*1]
+    mova                    m2, [aq+mmsize*2]
+    PALIGNR                 m6, m1, m0, 2, m5
+    PALIGNR                 m7, m1, m0, 4, m5
+    LOWPASS                  7,  6,  0
+    pavgw                   m6, m0
+    SCRATCH                  6,  8, rsp+0*mmsize
+    PALIGNR                 m4, m2, m1, 2, m0
+    PALIGNR                 m5, m2, m1, 4, m0
+    LOWPASS                  5,  4,  1
+    pavgw                   m4, m1
+    mova                    m0, [aq+mmsize*3]
+    PALIGNR                 m1, m0, m2, 2, m6
+    PALIGNR                 m3, m0, m2, 4, m6
+    LOWPASS                  3,  1,  2
+    pavgw                   m2, m1
+%if cpuflag(ssse3)
+    PRELOAD                 10, pb_2to15_14_15, shuf
+%endif
+    SHIFT_RIGHTx2           m6, m1, m0, reg_shuf
+    LOWPASS                  1,  6,  0
+    pavgw                   m0, m6
+%if ARCH_X86_64
+    pshufd                  m9, m6, q3333
+%endif
+%if cpuflag(avx)
+    UNSCRATCH                6,  8, rsp+0*mmsize
+%endif
+    DEFINE_ARGS dst, stride, cnt, stride16, stride17
+    mov              stride16q, strideq
+    mov                   cntd, 8
+    shl              stride16q, 4
+    lea              stride17q, [stride16q+strideq]
+
+    ; FIXME m8 is unused for avx, so we could save one register here for win64
+.loop:
+%if notcpuflag(avx)
+    UNSCRATCH                6,  8, rsp+0*mmsize
+%endif
+    mova   [dstq+strideq*0+ 0], m6
+    mova   [dstq+strideq*0+16], m4
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+ 0], m7
+    mova   [dstq+strideq*1+16], m5
+    mova   [dstq+strideq*1+32], m3
+    mova   [dstq+strideq*1+48], m1
+    mova   [dstq+stride16q+ 0], m4
+    mova   [dstq+stride16q+16], m2
+    mova   [dstq+stride16q+32], m0
+%if ARCH_X86_64
+    mova   [dstq+stride16q+48], m9
+%endif
+    mova   [dstq+stride17q+ 0], m5
+    mova   [dstq+stride17q+16], m3
+    mova   [dstq+stride17q+32], m1
+%if ARCH_X86_64
+    mova   [dstq+stride17q+48], m9
+%endif
+    lea                   dstq, [dstq+strideq*2]
+%if cpuflag(avx)
+    vpalignr                m6, m4, m6, 2
+    vpalignr                m4, m2, m4, 2
+    vpalignr                m2, m0, m2, 2
+    vpalignr                m7, m5, m7, 2
+    vpalignr                m5, m3, m5, 2
+    vpalignr                m3, m1, m3, 2
+%else
+    SCRATCH                  3,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  1, 10, rsp+1*mmsize
+%endif
+    PALIGNR                 m3, m4, m6, 2, m1
+    mova                    m6, m3
+    PALIGNR                 m3, m2, m4, 2, m1
+    mova                    m4, m3
+    PALIGNR                 m3, m0, m2, 2, m1
+    mova                    m2, m3
+    PALIGNR                 m3, m5, m7, 2, m1
+    mova                    m7, m3
+    UNSCRATCH                3,  8, rsp+0*mmsize
+    SCRATCH                  6,  8, rsp+0*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                1, 10, rsp+1*mmsize
+    SCRATCH                  7, 10, rsp+1*mmsize
+%endif
+    PALIGNR                 m6, m3, m5, 2, m7
+    mova                    m5, m6
+    PALIGNR                 m6, m1, m3, 2, m7
+    mova                    m3, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 10, rsp+1*mmsize
+%endif
+%endif
+    SHIFT_RIGHT             m1, m1, reg_shuf
+    SHIFT_RIGHT             m0, m0, reg_shuf
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+%assign %%n 0
+%rep 4
+    mova   [dstq+strideq*0+48], m0
+    mova   [dstq+strideq*1+48], m0
+    mova   [dstq+strideq*2+48], m0
+    mova   [dstq+stride3q +48], m0
+%if %%n < 3
+    lea                   dstq, [dstq+strideq*4]
+%endif
+%assign %%n (%%n+1)
+%endrep
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VL_FUNCS 2
+INIT_XMM ssse3
+VL_FUNCS 1
+INIT_XMM avx
+VL_FUNCS 1
+
+%macro VR_FUNCS 0
+cglobal vp9_ipred_vr_4x4_16, 4, 4, 3, dst, stride, l, a
+    movu                    m0, [aq-2]
+    movhps                  m1, [lq]
+    PALIGNR                 m0, m1, 10, m2          ; xyz*abcd
+    pslldq                  m1, m0, 2               ; .xyz*abc
+    pslldq                  m2, m0, 4               ; ..xyz*ab
+    LOWPASS                  2,  1, 0               ; ..YZ#ABC
+    pavgw                   m1, m0                  ; ....#ABC
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movhps    [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m2
+    shufps                  m0, m2, m1, q3210
+%if cpuflag(ssse3)
+    pshufb                  m2, [pb_4_5_8to13_8x0]
+%else
+    pshuflw                 m2, m2, q2222
+    psrldq                  m2, 6
+%endif
+    psrldq                  m0, 6
+    movh      [dstq+strideq*2], m0
+    movh      [dstq+stride3q ], m2
+    RET
+
+cglobal vp9_ipred_vr_8x8_16, 4, 4, 5, dst, stride, l, a
+    movu                    m1, [aq-2]              ; *abcdefg
+    movu                    m2, [lq]                ; stuvwxyz
+    mova                    m0, [aq]                ; abcdefgh
+    PALIGNR                 m3, m1, m2, 14, m4      ; z*abcdef
+    LOWPASS                  3,  1,  0
+    pavgw                   m0, m1
+    PALIGNR                 m1, m2,  2, m4          ; tuvwxyz*
+    pslldq                  m4, m2,  2              ; .stuvwxy
+    LOWPASS                  4,  2,  1
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m1
+    pslldq                  m4, 2
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m1
+    pslldq                  m4, 2
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    PALIGNR                 m0, m4, 14, m1
+    pslldq                  m4, 2
+    PALIGNR                 m3, m4, 14, m4
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m3
+    RET
+
+cglobal vp9_ipred_vr_16x16_16, 4, 4, 8, dst, stride, l, a
+    movu                    m1, [aq-2]              ; *abcdefg
+    movu                    m2, [aq+mmsize-2]       ; hijklmno
+    mova                    m3, [aq]                ; abcdefgh
+    mova                    m4, [aq+mmsize]         ; ijklmnop
+    mova                    m5, [lq+mmsize]         ; stuvwxyz
+    PALIGNR                 m0, m1, m5, 14, m6      ; z*abcdef
+    movu                    m6, [aq+mmsize-4]       ; ghijklmn
+    LOWPASS                  6,  2,  4
+    pavgw                   m2, m4
+    LOWPASS                  0,  1,  3
+    pavgw                   m3, m1
+    PALIGNR                 m1, m5,  2, m7          ; tuvwxyz*
+    movu                    m7, [lq+mmsize-2]       ; rstuvwxy
+    LOWPASS                  1,  5,  7
+    movu                    m5, [lq+2]              ; lmnopqrs
+    pslldq                  m4, m5,  2              ; .lmnopqr
+    pslldq                  m7, m5,  4              ; ..lmnopq
+    LOWPASS                  5,  4,  7
+    psrld                   m4, m1, 16
+    psrld                   m7, m5, 16
+    pand                    m1, [pd_65535]
+    pand                    m5, [pd_65535]
+    packssdw                m7, m4
+    packssdw                m5, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov                   cntd, 8
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m3
+    mova   [dstq+strideq*0+16], m2
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m6
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m2, m3, 14, m4
+    PALIGNR                 m3, m7, 14, m4
+    pslldq                  m7, 2
+    PALIGNR                 m6, m0, 14, m4
+    PALIGNR                 m0, m5, 14, m4
+    pslldq                  m5, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_vr_32x32_16, 4, 5, 14, 6 * mmsize * ARCH_X86_32, dst, stride, l, a
+    movu                    m0, [aq+mmsize*0-2]     ; *a[0-6]
+    movu                    m1, [aq+mmsize*1-2]     ; a[7-14]
+    movu                    m2, [aq+mmsize*2-2]     ; a[15-22]
+    movu                    m3, [aq+mmsize*3-2]     ; a[23-30]
+    mova                    m4, [aq+mmsize*3+0]     ; a[24-31]
+    movu                    m5, [aq+mmsize*3-4]     ; a[22-29]
+    LOWPASS                  5,  3,  4              ; A[23-30]
+    SCRATCH                  5,  8, rsp+0*mmsize
+    pavgw                   m3, m4
+    mova                    m4, [aq+mmsize*2+0]     ; a[16-23]
+    movu                    m6, [aq+mmsize*2-4]     ; a[14-21]
+    LOWPASS                  6,  2,  4              ; A[15-22]
+    SCRATCH                  6,  9, rsp+1*mmsize
+    pavgw                   m2, m4
+    mova                    m4, [aq+mmsize*1+0]     ; a[8-15]
+    movu                    m7, [aq+mmsize*1-4]     ; a[6-13]
+    LOWPASS                  7,  1,  4              ; A[7-14]
+    SCRATCH                  7, 10, rsp+2*mmsize
+    pavgw                   m1, m4
+    mova                    m4, [aq+mmsize*0+0]     ; a[0-7]
+    mova                    m5, [lq+mmsize*3+0]     ; l[24-31]
+    PALIGNR                 m6, m0, m5, 14, m7      ; l[31]*a[0-5]
+    LOWPASS                  6,  0,  4              ; #A[0-6]
+    SCRATCH                  6, 11, rsp+3*mmsize
+    pavgw                   m4, m0
+    PALIGNR                 m0, m5,  2, m7          ; l[25-31]*
+    movu                    m7, [lq+mmsize*3-2]     ; l[23-30]
+    LOWPASS                  0,  5,  7              ; L[24-31]
+    movu                    m5, [lq+mmsize*2-2]     ; l[15-22]
+    mova                    m7, [lq+mmsize*2+0]     ; l[16-23]
+    movu                    m6, [lq+mmsize*2+2]     ; l[17-24]
+    LOWPASS                  5,  7,  6              ; L[16-23]
+    psrld                   m7, m0, 16
+    psrld                   m6, m5, 16
+    pand                    m0, [pd_65535]
+    pand                    m5, [pd_65535]
+    packssdw                m6, m7
+    packssdw                m5, m0
+    SCRATCH                  5, 12, rsp+4*mmsize
+    SCRATCH                  6, 13, rsp+5*mmsize
+    movu                    m6, [lq+mmsize*1-2]     ; l[7-14]
+    mova                    m0, [lq+mmsize*1+0]     ; l[8-15]
+    movu                    m5, [lq+mmsize*1+2]     ; l[9-16]
+    LOWPASS                  6,  0,  5              ; L[8-15]
+    movu                    m0, [lq+mmsize*0+2]     ; l[1-8]
+    pslldq                  m5, m0,  2              ; .l[1-7]
+    pslldq                  m7, m0,  4              ; ..l[1-6]
+    LOWPASS                  0,  5,  7
+    psrld                   m5, m6, 16
+    psrld                   m7, m0, 16
+    pand                    m6, [pd_65535]
+    pand                    m0, [pd_65535]
+    packssdw                m7, m5
+    packssdw                m0, m6
+    UNSCRATCH                6, 13, rsp+5*mmsize
+    DEFINE_ARGS dst, stride, stride16, cnt, stride17
+    mov              stride16q, strideq
+    mov                   cntd, 8
+    shl              stride16q, 4
+%if ARCH_X86_64
+    lea              stride17q, [stride16q+strideq]
+%endif
+
+.loop:
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m3
+%if ARCH_X86_64
+    mova   [dstq+strideq*1+ 0], m11
+    mova   [dstq+strideq*1+16], m10
+    mova   [dstq+strideq*1+32], m9
+    mova   [dstq+strideq*1+48], m8
+%endif
+    mova   [dstq+stride16q+ 0], m6
+    mova   [dstq+stride16q+16], m4
+    mova   [dstq+stride16q+32], m1
+    mova   [dstq+stride16q+48], m2
+%if ARCH_X86_64
+    mova   [dstq+stride17q+ 0], m12
+    mova   [dstq+stride17q+16], m11
+    mova   [dstq+stride17q+32], m10
+    mova   [dstq+stride17q+48], m9
+%endif
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m3, m2,  14, m5
+    PALIGNR                 m2, m1,  14, m5
+    PALIGNR                 m1, m4,  14, m5
+    PALIGNR                 m4, m6,  14, m5
+    PALIGNR                 m6, m7,  14, m5
+    pslldq                  m7, 2
+%if ARCH_X86_64
+    PALIGNR                 m8, m9,  14, m5
+    PALIGNR                 m9, m10, 14, m5
+    PALIGNR                m10, m11, 14, m5
+    PALIGNR                m11, m12, 14, m5
+    PALIGNR                m12, m0,  14, m5
+    pslldq                  m0, 2
+%endif
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    UNSCRATCH                5, 12, rsp+4*mmsize
+    UNSCRATCH                4, 11, rsp+3*mmsize
+    UNSCRATCH                3, 10, rsp+2*mmsize
+    UNSCRATCH                2,  9, rsp+1*mmsize
+    UNSCRATCH                1,  8, rsp+0*mmsize
+    mov                   dstq, dstm
+    mov                   cntd, 8
+    add                   dstq, strideq
+.loop2:
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m3
+    mova   [dstq+strideq*0+32], m2
+    mova   [dstq+strideq*0+48], m1
+    mova   [dstq+stride16q+ 0], m5
+    mova   [dstq+stride16q+16], m4
+    mova   [dstq+stride16q+32], m3
+    mova   [dstq+stride16q+48], m2
+    lea                   dstq, [dstq+strideq*2]
+    PALIGNR                 m1, m2,  14, m6
+    PALIGNR                 m2, m3,  14, m6
+    PALIGNR                 m3, m4,  14, m6
+    PALIGNR                 m4, m5,  14, m6
+    PALIGNR                 m5, m0,  14, m6
+    pslldq                  m0, 2
+    dec                   cntd
+    jg .loop2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+VR_FUNCS
+INIT_XMM ssse3
+VR_FUNCS
+INIT_XMM avx
+VR_FUNCS
+
+%macro HU_FUNCS 1 ; stack_mem_for_32x32_32bit_function
+cglobal vp9_ipred_hu_4x4_16, 3, 3, 3, dst, stride, l, a
+    movh                    m0, [lq]                ; abcd
+%if cpuflag(ssse3)
+    pshufb                  m0, [pb_0to7_67x4]      ; abcddddd
+%else
+    punpcklqdq              m0, m0
+    pshufhw                 m0, m0, q3333           ; abcddddd
+%endif
+    psrldq                  m1, m0,  2              ; bcddddd.
+    psrldq                  m2, m0,  4              ; cddddd..
+    LOWPASS                  2,  1,  0              ; BCDddd..
+    pavgw                   m1, m0                  ; abcddddd
+    SBUTTERFLY          wd,  1,  2,  0              ; aBbCcDdd, dddddddd
+    PALIGNR                 m2, m1,  4, m0          ; bCcDdddd
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+strideq*0], m1                  ; aBbC
+    movh      [dstq+strideq*1], m2                  ; bCcD
+    movhps    [dstq+strideq*2], m1                  ; cDdd
+    movhps    [dstq+stride3q ], m2                  ; dddd
+    RET
+
+cglobal vp9_ipred_hu_8x8_16, 3, 3, 4, dst, stride, l, a
+    mova                    m0, [lq]
+%if cpuflag(ssse3)
+    mova                    m3, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m2, m0, m3
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    SBUTTERFLY          wd,  1,  2,  0
+    shufps                  m0, m1, m2, q1032
+    pshufd                  m3, m2, q3332
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    mova     [dstq+strideq *0], m1
+    mova     [dstq+strideq *2], m0
+    mova     [dstq+strideq *4], m2
+    mova     [dstq+stride3q*2], m3
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m2, m1, 4
+%else
+    PALIGNR                 m0, m2, m1, 4, m3
+    mova                    m1, m0
+%endif
+    pshufd                  m2, m2, q3321
+    shufps                  m0, m1, m2, q1032
+    pshufd                  m3, m2, q3332
+    mova     [dstq+strideq *0], m1
+    mova     [dstq+strideq *2], m0
+    mova     [dstq+strideq *4], m2
+    mova     [dstq+stride3q*2], m3
+    RET
+
+cglobal vp9_ipred_hu_16x16_16, 3, 4, 6 + notcpuflag(ssse3), dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m3, [lq+mmsize]
+    movu                    m1, [lq+2]
+    movu                    m2, [lq+4]
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    SBUTTERFLY           wd, 1,  2,  0
+%if cpuflag(ssse3)
+    mova                    m5, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m0, m4, m3, m5
+    LOWPASS                  4,  0,  3
+    pavgw                   m3, m0
+    SBUTTERFLY           wd, 3,  4,  5
+    pshufd                  m0, m0, q3333
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+
+.loop:
+    mova  [dstq+strideq *0+ 0], m1
+    mova  [dstq+strideq *0+16], m2
+    mova  [dstq+strideq *4+ 0], m2
+    mova  [dstq+strideq *4+16], m3
+    mova  [dstq+strideq *8+ 0], m3
+    mova  [dstq+strideq *8+16], m4
+    mova  [dstq+stride3q*4+ 0], m4
+    mova  [dstq+stride3q*4+16], m0
+    add                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m2, m1, 4
+    vpalignr                m2, m3, m2, 4
+    vpalignr                m3, m4, m3, 4
+    vpalignr                m4, m0, m4, 4
+%else
+    PALIGNR                 m5, m2, m1, 4, m6
+    mova                    m1, m5
+    PALIGNR                 m5, m3, m2, 4, m6
+    mova                    m2, m5
+    PALIGNR                 m5, m4, m3, 4, m6
+    mova                    m3, m5
+    PALIGNR                 m5, m0, m4, 4, m6
+    mova                    m4, m5
+%endif
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hu_32x32_16, 3, 7, 10 + notcpuflag(ssse3), \
+                               %1 * -mmsize * ARCH_X86_32, dst, stride, l, a
+    mova                    m2, [lq+mmsize*0+0]
+    movu                    m1, [lq+mmsize*0+2]
+    movu                    m0, [lq+mmsize*0+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    SBUTTERFLY           wd, 1,  0,  2
+    SCRATCH                  1,  8, rsp+0*mmsize
+    mova                    m4, [lq+mmsize*1+0]
+    movu                    m3, [lq+mmsize*1+2]
+    movu                    m2, [lq+mmsize*1+4]
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 3,  2,  4
+    mova                    m6, [lq+mmsize*2+0]
+    movu                    m5, [lq+mmsize*2+2]
+    movu                    m4, [lq+mmsize*2+4]
+    LOWPASS                  4,  5,  6
+    pavgw                   m5, m6
+    SBUTTERFLY           wd, 5,  4,  6
+    mova                    m7, [lq+mmsize*3+0]
+    SCRATCH                  0,  9, rsp+1*mmsize
+%if cpuflag(ssse3)
+    mova                    m0, [pb_2to15_14_15]
+%endif
+    SHIFT_RIGHTx2           m1, m6, m7, m0
+    LOWPASS                  6,  1,  7
+    pavgw                   m7, m1
+    SBUTTERFLY           wd, 7,  6,  0
+    pshufd                  m1, m1, q3333
+    UNSCRATCH                0,  9, rsp+1*mmsize
+    DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28
+    lea               stride3q, [strideq*3]
+    lea               stride4q, [strideq*4]
+    lea              stride28q, [stride4q*8]
+    lea              stride20q, [stride4q*5]
+    sub              stride28q, stride4q
+    mov                   cntd, 4
+
+.loop:
+%if ARCH_X86_64
+    SWAP                     1,  8
+%else
+    mova        [rsp+1*mmsize], m1
+    mova                    m1, [rsp+0*mmsize]
+%endif
+    mova  [dstq+strideq *0+ 0], m1
+    mova  [dstq+strideq *0+16], m0
+    mova  [dstq+strideq *0+32], m3
+    mova  [dstq+strideq *0+48], m2
+    mova  [dstq+stride4q*1+ 0], m0
+    mova  [dstq+stride4q*1+16], m3
+    mova  [dstq+stride4q*1+32], m2
+    mova  [dstq+stride4q*1+48], m5
+    mova  [dstq+stride4q*2+ 0], m3
+    mova  [dstq+stride4q*2+16], m2
+    mova  [dstq+stride4q*2+32], m5
+    mova  [dstq+stride4q*2+48], m4
+%if cpuflag(avx)
+    vpalignr                m1, m0, m1, 4
+    vpalignr                m0, m3, m0, 4
+    vpalignr                m3, m2, m3, 4
+%else
+    SCRATCH                  6,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    SCRATCH                  7, 10, rsp+3*mmsize
+%endif
+    PALIGNR                 m6, m0, m1, 4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3, m0, 4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2, m3, 4, m7
+    mova                    m3, m6
+    UNSCRATCH                6,  9, rsp+2*mmsize
+    SCRATCH                  0,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 10, rsp+3*mmsize
+    SCRATCH                  3, 10, rsp+3*mmsize
+%endif
+%endif
+%if ARCH_X86_64
+    SWAP                     1,  8
+%else
+    mova        [rsp+0*mmsize], m1
+    mova                    m1, [rsp+1*mmsize]
+%endif
+    mova  [dstq+stride3q*4+ 0], m2
+    mova  [dstq+stride3q*4+16], m5
+    mova  [dstq+stride3q*4+32], m4
+    mova  [dstq+stride3q*4+48], m7
+    mova  [dstq+stride4q*4+ 0], m5
+    mova  [dstq+stride4q*4+16], m4
+    mova  [dstq+stride4q*4+32], m7
+    mova  [dstq+stride4q*4+48], m6
+    mova  [dstq+stride20q + 0], m4
+    mova  [dstq+stride20q +16], m7
+    mova  [dstq+stride20q +32], m6
+    mova  [dstq+stride20q +48], m1
+    mova  [dstq+stride3q*8+ 0], m7
+    mova  [dstq+stride3q*8+16], m6
+    mova  [dstq+stride3q*8+32], m1
+    mova  [dstq+stride3q*8+48], m1
+    mova  [dstq+stride28q + 0], m6
+    mova  [dstq+stride28q +16], m1
+    mova  [dstq+stride28q +32], m1
+    mova  [dstq+stride28q +48], m1
+%if cpuflag(avx)
+    vpalignr                m2, m5, m2, 4
+    vpalignr                m5, m4, m5, 4
+    vpalignr                m4, m7, m4, 4
+    vpalignr                m7, m6, m7, 4
+    vpalignr                m6, m1, m6, 4
+%else
+    PALIGNR                 m0, m5, m2, 4, m3
+    mova                    m2, m0
+    PALIGNR                 m0, m4, m5, 4, m3
+    mova                    m5, m0
+    PALIGNR                 m0, m7, m4, 4, m3
+    mova                    m4, m0
+    PALIGNR                 m0, m6, m7, 4, m3
+    mova                    m7, m0
+    PALIGNR                 m0, m1, m6, 4, m3
+    mova                    m6, m0
+    UNSCRATCH                0,  9, rsp+2*mmsize
+%if notcpuflag(ssse3)
+    UNSCRATCH                3, 10, rsp+3*mmsize
+%endif
+%endif
+    add                   dstq, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+HU_FUNCS 4
+INIT_XMM ssse3
+HU_FUNCS 3
+INIT_XMM avx
+HU_FUNCS 2
+
+%macro HD_FUNCS 0
+cglobal vp9_ipred_hd_4x4_16, 4, 4, 4, dst, stride, l, a
+    movh                    m0, [lq]
+    movhps                  m0, [aq-2]
+    psrldq                  m1, m0, 2
+    psrldq                  m2, m0, 4
+    LOWPASS                  2,  1,  0
+    pavgw                   m1, m0
+    punpcklwd               m1, m2
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+
+    movh      [dstq+stride3q ], m1
+    movhps    [dstq+strideq*1], m1
+    movhlps                 m2, m2
+    PALIGNR                 m2, m1, 4, m0
+    movh      [dstq+strideq*2], m2
+    movhps    [dstq+strideq*0], m2
+    RET
+
+cglobal vp9_ipred_hd_8x8_16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]
+    movu                    m1, [aq-2]
+    PALIGNR                 m2, m1, m0, 2, m3
+    PALIGNR                 m3, m1, m0, 4, m4
+    LOWPASS                  3,  2,  0
+    pavgw                   m2, m0
+    SBUTTERFLY           wd, 2,  3,  0
+    psrldq                  m0, m1,  2
+    psrldq                  m4, m1,  4
+    LOWPASS                  1,  0,  4
+    DEFINE_ARGS dst8, mstride, cnt
+    lea                  dst8q, [dst8q+mstrideq*8]
+    neg               mstrideq
+    mov                   cntd, 4
+
+.loop:
+    add                  dst8q, mstrideq
+    mova    [dst8q+mstrideq*0], m2
+    mova    [dst8q+mstrideq*4], m3
+%if cpuflag(avx)
+    vpalignr                m2, m3, m2, 4
+    vpalignr                m3, m1, m3, 4
+%else
+    PALIGNR                 m0, m3, m2, 4, m4
+    mova                    m2, m0
+    PALIGNR                 m0, m1, m3, 4, m4
+    mova                    m3, m0
+%endif
+    psrldq                  m1, 4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_16x16_16, 4, 4, 8, dst, stride, l, a
+    mova                    m2, [lq]
+    movu                    m1, [lq+2]
+    movu                    m0, [lq+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    mova                    m4, [lq+mmsize]
+    movu                    m5, [aq-2]
+    PALIGNR                 m3, m5, m4, 2, m6
+    PALIGNR                 m2, m5, m4, 4, m6
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 1,  0,  4
+    SBUTTERFLY           wd, 3,  2,  4
+    mova                    m6, [aq]
+    movu                    m4, [aq+2]
+    LOWPASS                  4,  6,  5
+    movu                    m5, [aq+mmsize-2]
+    psrldq                  m6, m5,  2
+    psrldq                  m7, m5,  4
+    LOWPASS                  5,  6,  7
+    DEFINE_ARGS dst, mstride, mstride3, cnt
+    lea                   dstq, [dstq+mstrideq*8]
+    lea                   dstq, [dstq+mstrideq*8]
+    neg               mstrideq
+    lea              mstride3q, [mstrideq*3]
+    mov                   cntd, 4
+
+.loop:
+    add                  dstq, mstrideq
+    mova [dstq+mstride3q*4+ 0], m2
+    mova [dstq+mstride3q*4+16], m4
+    mova [dstq+mstrideq *8+ 0], m3
+    mova [dstq+mstrideq *8+16], m2
+    mova [dstq+mstrideq *4+ 0], m0
+    mova [dstq+mstrideq *4+16], m3
+    mova [dstq+mstrideq *0+ 0], m1
+    mova [dstq+mstrideq *0+16], m0
+%if cpuflag(avx)
+    vpalignr                m1, m0, m1, 4
+    vpalignr                m0, m3, m0, 4
+    vpalignr                m3, m2, m3, 4
+    vpalignr                m2, m4, m2, 4
+    vpalignr                m4, m5, m4, 4
+%else
+    PALIGNR                 m6, m0, m1, 4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3, m0, 4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2, m3, 4, m7
+    mova                    m3, m6
+    PALIGNR                 m6, m4, m2, 4, m7
+    mova                    m2, m6
+    PALIGNR                 m6, m5, m4, 4, m7
+    mova                    m4, m6
+%endif
+    psrldq                  m5, 4
+    dec                   cntd
+    jg .loop
+    RET
+
+cglobal vp9_ipred_hd_32x32_16, 4, 4 + 3 * ARCH_X86_64, 14, \
+                               10 * -mmsize * ARCH_X86_32, dst, stride, l, a
+    mova                    m2, [lq+mmsize*0+0]
+    movu                    m1, [lq+mmsize*0+2]
+    movu                    m0, [lq+mmsize*0+4]
+    LOWPASS                  0,  1,  2
+    pavgw                   m1, m2
+    SBUTTERFLY           wd, 1,  0,  2
+    mova                    m4, [lq+mmsize*1+0]
+    movu                    m3, [lq+mmsize*1+2]
+    movu                    m2, [lq+mmsize*1+4]
+    LOWPASS                  2,  3,  4
+    pavgw                   m3, m4
+    SBUTTERFLY           wd, 3,  2,  4
+    SCRATCH                  0,  8, rsp+0*mmsize
+    SCRATCH                  1,  9, rsp+1*mmsize
+    SCRATCH                  2, 10, rsp+2*mmsize
+    SCRATCH                  3, 11, rsp+3*mmsize
+    mova                    m6, [lq+mmsize*2+0]
+    movu                    m5, [lq+mmsize*2+2]
+    movu                    m4, [lq+mmsize*2+4]
+    LOWPASS                  4,  5,  6
+    pavgw                   m5, m6
+    SBUTTERFLY           wd, 5,  4,  6
+    mova                    m0, [lq+mmsize*3+0]
+    movu                    m1, [aq+mmsize*0-2]
+    PALIGNR                 m7, m1, m0, 2, m2
+    PALIGNR                 m6, m1, m0, 4, m2
+    LOWPASS                  6,  7,  0
+    pavgw                   m7, m0
+    SBUTTERFLY           wd, 7,  6,  0
+    mova                    m2, [aq+mmsize*0+0]
+    movu                    m0, [aq+mmsize*0+2]
+    LOWPASS                  0,  2,  1
+    movu                    m1, [aq+mmsize*1-2]
+    mova                    m2, [aq+mmsize*1+0]
+    movu                    m3, [aq+mmsize*1+2]
+    LOWPASS                  1,  2,  3
+    SCRATCH                  6, 12, rsp+6*mmsize
+    SCRATCH                  7, 13, rsp+7*mmsize
+    movu                    m2, [aq+mmsize*2-2]
+    mova                    m3, [aq+mmsize*2+0]
+    movu                    m6, [aq+mmsize*2+2]
+    LOWPASS                  2,  3,  6
+    movu                    m3, [aq+mmsize*3-2]
+    psrldq                  m6, m3,  2
+    psrldq                  m7, m3,  4
+    LOWPASS                  3,  6,  7
+    UNSCRATCH                6, 12, rsp+6*mmsize
+    UNSCRATCH                7, 13, rsp+7*mmsize
+%if ARCH_X86_32
+    mova        [rsp+4*mmsize], m4
+    mova        [rsp+5*mmsize], m5
+    ; we already backed up m6/m7 earlier on x86-32 in SCRATCH, so we don't need
+    ; to do it again here
+%endif
+    DEFINE_ARGS dst, stride, cnt, stride3, stride4, stride20, stride28
+    mov                   cntd, 4
+    lea               stride3q, [strideq*3]
+%if ARCH_X86_64
+    lea               stride4q, [strideq*4]
+    lea              stride28q, [stride4q*8]
+    lea              stride20q, [stride4q*5]
+    sub              stride28q, stride4q
+%endif
+    add                   dstq, stride3q
+
+    ; x86-32 doesn't have enough registers, so on that platform, we split
+    ; the loop in 2... Otherwise you spend most of the loop (un)scratching
+.loop:
+%if ARCH_X86_64
+    mova  [dstq+stride28q + 0], m9
+    mova  [dstq+stride28q +16], m8
+    mova  [dstq+stride28q +32], m11
+    mova  [dstq+stride28q +48], m10
+    mova  [dstq+stride3q*8+ 0], m8
+    mova  [dstq+stride3q*8+16], m11
+    mova  [dstq+stride3q*8+32], m10
+    mova  [dstq+stride3q*8+48], m5
+    mova  [dstq+stride20q + 0], m11
+    mova  [dstq+stride20q +16], m10
+    mova  [dstq+stride20q +32], m5
+    mova  [dstq+stride20q +48], m4
+    mova  [dstq+stride4q*4+ 0], m10
+    mova  [dstq+stride4q*4+16], m5
+    mova  [dstq+stride4q*4+32], m4
+    mova  [dstq+stride4q*4+48], m7
+%endif
+    mova  [dstq+stride3q*4+ 0], m5
+    mova  [dstq+stride3q*4+16], m4
+    mova  [dstq+stride3q*4+32], m7
+    mova  [dstq+stride3q*4+48], m6
+    mova  [dstq+strideq* 8+ 0], m4
+    mova  [dstq+strideq* 8+16], m7
+    mova  [dstq+strideq* 8+32], m6
+    mova  [dstq+strideq* 8+48], m0
+    mova  [dstq+strideq* 4+ 0], m7
+    mova  [dstq+strideq* 4+16], m6
+    mova  [dstq+strideq* 4+32], m0
+    mova  [dstq+strideq* 4+48], m1
+    mova  [dstq+strideq* 0+ 0], m6
+    mova  [dstq+strideq* 0+16], m0
+    mova  [dstq+strideq* 0+32], m1
+    mova  [dstq+strideq* 0+48], m2
+    sub                   dstq, strideq
+%if cpuflag(avx)
+%if ARCH_X86_64
+    vpalignr                m9, m8,  m9,  4
+    vpalignr                m8, m11, m8,  4
+    vpalignr               m11, m10, m11, 4
+    vpalignr               m10, m5,  m10, 4
+%endif
+    vpalignr                m5, m4,  m5,  4
+    vpalignr                m4, m7,  m4,  4
+    vpalignr                m7, m6,  m7,  4
+    vpalignr                m6, m0,  m6,  4
+    vpalignr                m0, m1,  m0,  4
+    vpalignr                m1, m2,  m1,  4
+    vpalignr                m2, m3,  m2,  4
+%else
+%if ARCH_X86_64
+    PALIGNR                m12, m8,  m9,  4, m13
+    mova                    m9, m12
+    PALIGNR                m12, m11, m8,  4, m13
+    mova                    m8, m12
+    PALIGNR                m12, m10, m11, 4, m13
+    mova                   m11, m12
+    PALIGNR                m12, m5,  m10, 4, m13
+    mova                   m10, m12
+%endif
+    SCRATCH                  3, 12, rsp+8*mmsize, sh
+%if notcpuflag(ssse3)
+    SCRATCH                  2, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m3, m4,  m5,  4, m2
+    mova                    m5, m3
+    PALIGNR                 m3, m7,  m4,  4, m2
+    mova                    m4, m3
+    PALIGNR                 m3, m6,  m7,  4, m2
+    mova                    m7, m3
+    PALIGNR                 m3, m0,  m6,  4, m2
+    mova                    m6, m3
+    PALIGNR                 m3, m1,  m0,  4, m2
+    mova                    m0, m3
+%if notcpuflag(ssse3)
+    UNSCRATCH                2, 13, rsp+9*mmsize
+    SCRATCH                  0, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m3, m2,  m1,  4, m0
+    mova                    m1, m3
+    PALIGNR                 m3, reg_sh,  m2,  4, m0
+    mova                    m2, m3
+%if notcpuflag(ssse3)
+    UNSCRATCH                0, 13, rsp+9*mmsize
+%endif
+    UNSCRATCH                3, 12, rsp+8*mmsize, sh
+%endif
+    psrldq                  m3, 4
+    dec                   cntd
+    jg .loop
+
+%if ARCH_X86_32
+    UNSCRATCH                0,  8, rsp+0*mmsize
+    UNSCRATCH                1,  9, rsp+1*mmsize
+    UNSCRATCH                2, 10, rsp+2*mmsize
+    UNSCRATCH                3, 11, rsp+3*mmsize
+    mova                    m4, [rsp+4*mmsize]
+    mova                    m5, [rsp+5*mmsize]
+    mova                    m6, [rsp+6*mmsize]
+    mova                    m7, [rsp+7*mmsize]
+    DEFINE_ARGS dst, stride, stride5, stride3
+    lea               stride5q, [strideq*5]
+    lea                   dstq, [dstq+stride5q*4]
+    DEFINE_ARGS dst, stride, cnt, stride3
+    mov                   cntd, 4
+.loop_2:
+    mova  [dstq+stride3q*4+ 0], m1
+    mova  [dstq+stride3q*4+16], m0
+    mova  [dstq+stride3q*4+32], m3
+    mova  [dstq+stride3q*4+48], m2
+    mova  [dstq+strideq* 8+ 0], m0
+    mova  [dstq+strideq* 8+16], m3
+    mova  [dstq+strideq* 8+32], m2
+    mova  [dstq+strideq* 8+48], m5
+    mova  [dstq+strideq* 4+ 0], m3
+    mova  [dstq+strideq* 4+16], m2
+    mova  [dstq+strideq* 4+32], m5
+    mova  [dstq+strideq* 4+48], m4
+    mova  [dstq+strideq* 0+ 0], m2
+    mova  [dstq+strideq* 0+16], m5
+    mova  [dstq+strideq* 0+32], m4
+    mova  [dstq+strideq* 0+48], m7
+    sub                   dstq, strideq
+%if cpuflag(avx)
+    vpalignr                m1, m0,  m1,  4
+    vpalignr                m0, m3,  m0,  4
+    vpalignr                m3, m2,  m3,  4
+    vpalignr                m2, m5,  m2,  4
+    vpalignr                m5, m4,  m5,  4
+    vpalignr                m4, m7,  m4,  4
+    vpalignr                m7, m6,  m7,  4
+%else
+    SCRATCH                  6, 12, rsp+8*mmsize, sh
+%if notcpuflag(ssse3)
+    SCRATCH                  7, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m6, m0,  m1,  4, m7
+    mova                    m1, m6
+    PALIGNR                 m6, m3,  m0,  4, m7
+    mova                    m0, m6
+    PALIGNR                 m6, m2,  m3,  4, m7
+    mova                    m3, m6
+    PALIGNR                 m6, m5,  m2,  4, m7
+    mova                    m2, m6
+    PALIGNR                 m6, m4,  m5,  4, m7
+    mova                    m5, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                7, 13, rsp+9*mmsize
+    SCRATCH                  5, 13, rsp+9*mmsize
+%endif
+    PALIGNR                 m6, m7,  m4,  4, m5
+    mova                    m4, m6
+    PALIGNR                 m6, reg_sh,  m7,  4, m5
+    mova                    m7, m6
+%if notcpuflag(ssse3)
+    UNSCRATCH                5, 13, rsp+9*mmsize
+%endif
+    UNSCRATCH                6, 12, rsp+8*mmsize, sh
+%endif
+    psrldq                  m6, 4
+    dec                   cntd
+    jg .loop_2
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse2
+HD_FUNCS
+INIT_XMM ssse3
+HD_FUNCS
+INIT_XMM avx
+HD_FUNCS
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm.asm b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
new file mode 100644
index 000000000..6d5008e33
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
@@ -0,0 +1,2625 @@
+;******************************************************************************
+;* VP9 IDCT SIMD optimizations
+;*
+;* Copyright (C) 2013 Clément Bœsch <u pkh me>
+;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+%include "vp9itxfm_template.asm"
+
+SECTION_RODATA
+
+%macro VP9_IDCT_COEFFS 2-3 0
+const pw_m%1_%2
+times 4 dw -%1,  %2
+const pw_%2_%1
+times 4 dw  %2,  %1
+
+%if %3 == 1
+const pw_m%2_m%1
+times 4 dw -%2, -%1
+%if %1 != %2
+const pw_m%2_%1
+times 4 dw -%2,  %1
+const pw_%1_%2
+times 4 dw  %1,  %2
+%endif
+%endif
+
+%if %1 < 11585
+pw_m%1x2:   times 8 dw -%1*2
+%elif %1 > 11585
+pw_%1x2:    times 8 dw  %1*2
+%else
+const pw_%1x2
+times 8 dw %1*2
+%endif
+
+%if %2 != %1
+pw_%2x2:    times 8 dw  %2*2
+%endif
+%endmacro
+
+VP9_IDCT_COEFFS 16364,   804
+VP9_IDCT_COEFFS 16305,  1606
+VP9_IDCT_COEFFS 16069,  3196, 1
+VP9_IDCT_COEFFS 15893,  3981
+VP9_IDCT_COEFFS 15137,  6270, 1
+VP9_IDCT_COEFFS 14811,  7005
+VP9_IDCT_COEFFS 14449,  7723
+VP9_IDCT_COEFFS 13160,  9760
+VP9_IDCT_COEFFS 11585, 11585, 1
+VP9_IDCT_COEFFS 11003, 12140
+VP9_IDCT_COEFFS 10394, 12665
+VP9_IDCT_COEFFS  9102, 13623, 1
+VP9_IDCT_COEFFS  8423, 14053
+VP9_IDCT_COEFFS  5520, 15426
+VP9_IDCT_COEFFS  4756, 15679
+VP9_IDCT_COEFFS  2404, 16207
+
+const pw_5283_13377
+times 4 dw 5283, 13377
+const pw_9929_13377
+times 4 dw 9929, 13377
+const pw_15212_m13377
+times 4 dw 15212, -13377
+const pw_15212_9929
+times 4 dw 15212, 9929
+const pw_m5283_m15212
+times 4 dw -5283, -15212
+const pw_13377x2
+times 8 dw 13377*2
+const pw_m13377_13377
+times 4 dw -13377, 13377
+const pw_13377_0
+times 4 dw 13377, 0
+
+cextern pw_8
+cextern pw_16
+cextern pw_32
+cextern pw_512
+cextern pw_1024
+cextern pw_2048
+cextern pw_m1
+cextern pd_8192
+
+SECTION .text
+
+%macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2
+    punpckhwd          m%4, m%2, m%1
+    punpcklwd          m%2, m%1
+    pmaddwd            m%3, m%4, [pw_m%5_%6]
+    pmaddwd            m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_%6_%5]
+%endmacro
+
+%macro VP9_RND_SH_SUMSUB_BA 6 ; dst1 [src1], dst2 [src2], src3, src4, tmp, round
+    SUMSUB_BA            d, %1, %2, %5
+    SUMSUB_BA            d, %3, %4, %5
+    paddd              m%1, %6
+    paddd              m%2, %6
+    paddd              m%3, %6
+    paddd              m%4, %6
+    psrad              m%1, 14
+    psrad              m%2, 14
+    psrad              m%3, 14
+    psrad              m%4, 14
+    packssdw           m%1, m%3
+    packssdw           m%2, m%4
+%endmacro
+
+%macro VP9_STORE_2X 5-6 dstq ; reg1, reg2, tmp1, tmp2, zero, dst
+    movh               m%3, [%6]
+    movh               m%4, [%6+strideq]
+    punpcklbw          m%3, m%5
+    punpcklbw          m%4, m%5
+    paddw              m%3, m%1
+    paddw              m%4, m%2
+    packuswb           m%3, m%5
+    packuswb           m%4, m%5
+    movh              [%6], m%3
+    movh      [%6+strideq], m%4
+%endmacro
+
+%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
+%assign %%y 0
+%rep %3
+%assign %%x 0
+%rep %3*2/mmsize
+    mova      [%1+%%y+%%x], %4
+%assign %%x (%%x+mmsize)
+%endrep
+%assign %%y (%%y+%2)
+%endrep
+%endmacro
+
+;-------------------------------------------------------------------------------------------
+; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+INIT_MMX mmx
+cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
+    mova                m0, [blockq+0*8]
+    mova                m1, [blockq+1*8]
+    mova                m2, [blockq+2*8]
+    mova                m3, [blockq+3*8]
+    psraw               m0, 2
+    psraw               m1, 2
+    psraw               m2, 2
+    psraw               m3, 2
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m4, m4
+    VP9_STORE_2X         0, 1, 5, 6, 4
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 5, 6, 4
+    ZERO_BLOCK      blockq, 8, 4, m4
+    RET
+
+;-------------------------------------------------------------------------------------------
+; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+; 2x2 top left corner
+%macro VP9_IDCT4_2x2_1D 0
+    pmulhrsw            m0, m5                              ; m0=t1
+    mova                m2, m0                              ; m2=t0
+    mova                m3, m1
+    pmulhrsw            m1, m6                              ; m1=t2
+    pmulhrsw            m3, m7                              ; m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT4_WRITEOUT 0
+%if cpuflag(ssse3)
+    mova                m5, [pw_2048]
+    pmulhrsw            m0, m5              ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+    pmulhrsw            m1, m5
+%else
+    mova                m5, [pw_8]
+    paddw               m0, m5
+    paddw               m1, m5
+    psraw               m0, 4
+    psraw               m1, 4
+%endif
+    VP9_STORE_2X         0,  1,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+%if cpuflag(ssse3)
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+%else
+    paddw               m2, m5
+    paddw               m3, m5
+    psraw               m2, 4
+    psraw               m3, 4
+%endif
+    VP9_STORE_2X         2,  3,  6,  7,  4
+%endmacro
+
+%macro IDCT_4x4_FN 1
+INIT_MMX %1
+cglobal vp9_idct_idct_4x4_add, 4, 4, 0, dst, stride, block, eob
+
+%if cpuflag(ssse3)
+    cmp eobd, 4 ; 2x2 or smaller
+    jg .idctfull
+
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idct2x2
+%else
+    cmp eobd, 1
+    jg .idctfull
+%endif
+
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    mova                m5, [pw_11585x2]
+    pmulhrsw            m0, m5
+    pmulhrsw            m0, m5
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (8 << 14) + 8192
+    sar              coefd, 14 + 4
+    movd                m0, coefd
+%endif
+    pshufw              m0, m0, 0
+    pxor                m4, m4
+    movh          [blockq], m4
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_2048]       ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+%endif
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    RET
+
+%if cpuflag(ssse3)
+; faster path for when only top left 2x2 block is set
+.idct2x2:
+    movd                m0, [blockq+0]
+    movd                m1, [blockq+8]
+    mova                m5, [pw_11585x2]
+    mova                m6, [pw_6270x2]
+    mova                m7, [pw_15137x2]
+    VP9_IDCT4_2x2_1D
+    ; partial 2x4 transpose
+    punpcklwd           m0, m1
+    punpcklwd           m2, m3
+    SBUTTERFLY          dq, 0, 2, 1
+    SWAP                1, 2
+    VP9_IDCT4_2x2_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    movh       [blockq+ 0], m4
+    movh       [blockq+ 8], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endif
+
+.idctfull: ; generic full 4x4 idct/idct
+    mova                m0, [blockq+ 0]
+    mova                m1, [blockq+ 8]
+    mova                m2, [blockq+16]
+    mova                m3, [blockq+24]
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT4_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    mova       [blockq+ 0], m4
+    mova       [blockq+ 8], m4
+    mova       [blockq+16], m4
+    mova       [blockq+24], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+IDCT_4x4_FN mmxext
+IDCT_4x4_FN ssse3
+
+;-------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro IADST4_FN 5
+INIT_MMX %5
+cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob
+%if WIN64 && notcpuflag(ssse3)
+    WIN64_SPILL_XMM 8
+%endif
+    movdqa            xmm5, [pd_8192]
+    mova                m0, [blockq+ 0]
+    mova                m1, [blockq+ 8]
+    mova                m2, [blockq+16]
+    mova                m3, [blockq+24]
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+%ifnidn %1%3, iadstiadst
+    movdq2q             m7, xmm5
+%endif
+    VP9_%2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_%4_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    mova       [blockq+ 0], m4
+    mova       [blockq+ 8], m4
+    mova       [blockq+16], m4
+    mova       [blockq+24], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+IADST4_FN idct,  IDCT4,  iadst, IADST4, sse2
+IADST4_FN iadst, IADST4, idct,  IDCT4,  sse2
+IADST4_FN iadst, IADST4, iadst, IADST4, sse2
+
+IADST4_FN idct,  IDCT4,  iadst, IADST4, ssse3
+IADST4_FN iadst, IADST4, idct,  IDCT4,  ssse3
+IADST4_FN iadst, IADST4, iadst, IADST4, ssse3
+
+%macro SCRATCH 3
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova              [%3], m%1
+%endif
+%endmacro
+
+%macro UNSCRATCH 3
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%endmacro
+
+;-------------------------------------------------------------------------------------------
+; void vp9_idct_idct_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT8_1D_FINALIZE 0
+    SUMSUB_BA            w,  3,  6, 5                       ; m3=t0+t7, m6=t0-t7
+    SUMSUB_BA            w,  1,  2, 5                       ; m1=t1+t6, m2=t1-t6
+    SUMSUB_BA            w,  7,  0, 5                       ; m7=t2+t5, m0=t2-t5
+
+    UNSCRATCH            5, 8, blockq+ 0
+    SCRATCH              2, 8, blockq+ 0
+
+    SUMSUB_BA            w,  5,  4, 2                       ; m5=t3+t4, m4=t3-t4
+    SWAP                 7,  6,  2
+    SWAP                 3,  5,  0
+
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+%endmacro
+
+; x86-32
+; - in: m0/m4 is in mem
+; - out: m6 is in mem
+; x86-64:
+; - everything is in registers (m0-7)
+%macro VP9_IDCT8_1D 0
+%if ARCH_X86_64
+    SWAP                 0, 8
+    SWAP                 4, 9
+%endif
+
+    VP9_UNPACK_MULSUB_2W_4X 5,  3,  9102, 13623, D_8192_REG, 0, 4  ; m5=t5a, m3=t6a
+    VP9_UNPACK_MULSUB_2W_4X 1,  7, 16069,  3196, D_8192_REG, 0, 4  ; m1=t4a, m7=t7a
+    SUMSUB_BA            w,  5,  1, 0                       ; m5=t4a+t5a (t4), m1=t4a-t5a (t5a)
+    SUMSUB_BA            w,  3,  7, 0                       ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a)
+%if cpuflag(ssse3)
+    SUMSUB_BA            w,  1,  7, 0                       ; m1=t6a+t5a (t6), m7=t6a-t5a (t5)
+    pmulhrsw            m1, W_11585x2_REG                   ; m1=t6
+    pmulhrsw            m7, W_11585x2_REG                   ; m7=t5
+%else
+    VP9_UNPACK_MULSUB_2W_4X 7,  1, 11585, 11585, D_8192_REG, 0, 4
+%endif
+    VP9_UNPACK_MULSUB_2W_4X 2,  6, 15137,  6270, D_8192_REG, 0, 4  ; m2=t2a, m6=t3a
+
+    UNSCRATCH            0, 8, blockq+ 0    ; IN(0)
+    UNSCRATCH            4, 9, blockq+64    ; IN(4)
+    SCRATCH              5, 8, blockq+ 0
+
+%if cpuflag(ssse3)
+    SUMSUB_BA            w, 4, 0, 5                         ; m4=IN(0)+IN(4) m0=IN(0)-IN(4)
+    pmulhrsw            m4, W_11585x2_REG                   ; m4=t0a
+    pmulhrsw            m0, W_11585x2_REG                   ; m0=t1a
+%else
+    SCRATCH              7, 9, blockq+64
+    VP9_UNPACK_MULSUB_2W_4X 0,  4, 11585, 11585, D_8192_REG, 5, 7
+    UNSCRATCH            7, 9, blockq+64
+%endif
+    SUMSUB_BA            w,  6,  4, 5                       ; m6=t0a+t3a (t0), m4=t0a-t3a (t3)
+    SUMSUB_BA            w,  2,  0, 5                       ; m2=t1a+t2a (t1), m0=t1a-t2a (t2)
+
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT8_4x4_1D 0
+    pmulhrsw            m0, W_11585x2_REG                   ; m0=t1a/t0a
+    pmulhrsw            m6, m2, [pw_15137x2]                ; m6=t3a
+    pmulhrsw            m2, [pw_6270x2]                     ; m2=t2a
+    pmulhrsw            m7, m1, [pw_16069x2]                ; m7=t7a
+    pmulhrsw            m1, [pw_3196x2]                     ; m1=t4a
+    pmulhrsw            m5, m3, [pw_m9102x2]                ; m5=t5a
+    pmulhrsw            m3, [pw_13623x2]                    ; m3=t6a
+    SUMSUB_BA            w,  5,  1, 4                       ; m1=t4a+t5a (t4), m5=t4a-t5a (t5a)
+    SUMSUB_BA            w,  3,  7, 4                       ; m3=t7a+t6a (t7), m7=t7a-t6a (t6a)
+    SUMSUB_BA            w,  1,  7, 4                       ; m1=t6a+t5a (t6), m7=t6a-t5a (t5)
+    pmulhrsw            m1, W_11585x2_REG                   ; m1=t6
+    pmulhrsw            m7, W_11585x2_REG                   ; m7=t5
+    psubw               m4, m0, m6                          ; m4=t0a-t3a (t3)
+    paddw               m6, m0                              ; m6=t0a+t3a (t0)
+    SCRATCH              5,  8, blockq+ 0
+    SUMSUB_BA            w,  2,  0, 5                       ; m2=t1a+t2a (t1), m0=t1a-t2a (t2)
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT8_2x2_1D 1
+    pmulhrsw            m0, W_11585x2_REG                   ; m0=t0
+    pmulhrsw            m3, m1, W_16069x2_REG               ; m3=t7
+    pmulhrsw            m1, W_3196x2_REG                    ; m1=t4
+    psubw               m7, m3, m1                          ; t5 = t7a - t4a
+    paddw               m5, m3, m1                          ; t6 = t7a + t4a
+    pmulhrsw            m7, W_11585x2_REG                   ; m7=t5
+    pmulhrsw            m5, W_11585x2_REG                   ; m5=t6
+    SWAP                 5,  1
+    ; merged VP9_IDCT8_1D_FINALIZE to make register-sharing w/ avx easier
+    psubw               m6, m0, m3                          ; m6=t0-t7
+    paddw               m3, m0                              ; m3=t0+t7
+    psubw               m2, m0, m1                          ; m2=t1-t6
+    paddw               m1, m0                              ; m1=t1+t6
+%if %1 == 1
+    punpcklwd           m3, m1
+%define SCRATCH_REG 1
+%elif ARCH_X86_32
+    mova       [blockq+ 0], m2
+%define SCRATCH_REG 2
+%else
+%define SCRATCH_REG 8
+%endif
+    psubw               m4, m0, m5                          ; m4=t3-t4
+    paddw               m5, m0                              ; m5=t3+t4
+    SUMSUB_BA            w,  7,  0, SCRATCH_REG             ; m7=t2+t5, m0=t2-t5
+    SWAP                 7,  6,  2
+    SWAP                 3,  5,  0
+%undef SCRATCH_REG
+%endmacro
+
+%macro VP9_IDCT8_WRITEx2 6-8 5 ; line1, line2, tmp1, tmp2, zero, pw_1024/pw_16, shift
+%if cpuflag(ssse3)
+    pmulhrsw           m%1, %6              ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5
+    pmulhrsw           m%2, %6
+%else
+    paddw              m%1, %6
+    paddw              m%2, %6
+    psraw              m%1, %7
+    psraw              m%2, %7
+%endif
+%if %0 <= 7
+    VP9_STORE_2X        %1, %2, %3, %4, %5
+%else
+    VP9_STORE_2X        %1, %2, %3, %4, %5, %8
+%endif
+%endmacro
+
+; x86-32:
+; - m6 is in mem
+; x86-64:
+; - m8 holds m6 (SWAP)
+; m6 holds zero
+%macro VP9_IDCT8_WRITEOUT 0
+%if ARCH_X86_64
+%if cpuflag(ssse3)
+    mova                m9, [pw_1024]
+%else
+    mova                m9, [pw_16]
+%endif
+%define ROUND_REG m9
+%else
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_1024]
+%else
+%define ROUND_REG [pw_16]
+%endif
+%endif
+    SCRATCH              5, 10, blockq+16
+    SCRATCH              7, 11, blockq+32
+    VP9_IDCT8_WRITEx2    0,  1, 5, 7, 6, ROUND_REG
+    lea               dstq, [dstq+2*strideq]
+    VP9_IDCT8_WRITEx2    2,  3, 5, 7, 6, ROUND_REG
+    lea               dstq, [dstq+2*strideq]
+    UNSCRATCH            5, 10, blockq+16
+    UNSCRATCH            7, 11, blockq+32
+    VP9_IDCT8_WRITEx2    4,  5, 0, 1, 6, ROUND_REG
+    lea               dstq, [dstq+2*strideq]
+    UNSCRATCH            5, 8, blockq+ 0
+    VP9_IDCT8_WRITEx2    5,  7, 0, 1, 6, ROUND_REG
+
+%undef ROUND_REG
+%endmacro
+
+%macro VP9_IDCT_IDCT_8x8_ADD_XMM 2
+INIT_XMM %1
+cglobal vp9_idct_idct_8x8_add, 4, 4, %2, dst, stride, block, eob
+
+%if cpuflag(ssse3)
+%if ARCH_X86_64
+    mova               m12, [pw_11585x2]    ; often used
+%define W_11585x2_REG m12
+%else
+%define W_11585x2_REG [pw_11585x2]
+%endif
+
+    cmp eobd, 12 ; top left half or less
+    jg .idctfull
+
+    cmp eobd, 3  ; top left corner or less
+    jg .idcthalf
+
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idcttopleftcorner
+%else
+    cmp eobd, 1
+    jg .idctfull
+%endif
+
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    pmulhrsw            m0, W_11585x2_REG
+    pmulhrsw            m0, W_11585x2_REG
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (16 << 14) + 8192
+    sar              coefd, 14 + 5
+    movd                m0, coefd
+%endif
+    SPLATW              m0, m0, 0
+    pxor                m4, m4
+    movd          [blockq], m4
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_1024]       ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5
+%endif
+%rep 3
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+%endrep
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    RET
+
+%if cpuflag(ssse3)
+; faster path for when only left corner is set (3 input: DC, right to DC, below
+; to DC). Note: also working with a 2x2 block
+.idcttopleftcorner:
+    movd                m0, [blockq+0]
+    movd                m1, [blockq+16]
+%if ARCH_X86_64
+    mova               m10, [pw_3196x2]
+    mova               m11, [pw_16069x2]
+%define W_3196x2_REG m10
+%define W_16069x2_REG m11
+%else
+%define W_3196x2_REG [pw_3196x2]
+%define W_16069x2_REG [pw_16069x2]
+%endif
+    VP9_IDCT8_2x2_1D 1
+    ; partial 2x8 transpose
+    ; punpcklwd m0, m1 already done inside idct
+    punpcklwd           m2, m3
+    punpcklwd           m4, m5
+    punpcklwd           m6, m7
+    punpckldq           m0, m2
+    punpckldq           m4, m6
+    SBUTTERFLY         qdq, 0, 4, 1
+    SWAP                 1, 4
+    VP9_IDCT8_2x2_1D 2
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6  ; used for the block reset, and VP9_STORE_2X
+    VP9_IDCT8_WRITEOUT
+%if ARCH_X86_64
+    movd       [blockq+ 0], m6
+    movd       [blockq+16], m6
+%else
+    mova       [blockq+ 0], m6
+    mova       [blockq+16], m6
+    mova       [blockq+32], m6
+%endif
+    RET
+
+.idcthalf:
+    movh                m0, [blockq + 0]
+    movh                m1, [blockq +16]
+    movh                m2, [blockq +32]
+    movh                m3, [blockq +48]
+    VP9_IDCT8_4x4_1D
+    ; partial 4x8 transpose
+%if ARCH_X86_32
+    mova                m6, [blockq+ 0]
+%endif
+    punpcklwd           m0, m1
+    punpcklwd           m2, m3
+    punpcklwd           m4, m5
+    punpcklwd           m6, m7
+    SBUTTERFLY          dq, 0, 2, 1
+    SBUTTERFLY          dq, 4, 6, 5
+    SBUTTERFLY         qdq, 0, 4, 1
+    SBUTTERFLY         qdq, 2, 6, 5
+    SWAP                 1, 4
+    SWAP                 3, 6
+    VP9_IDCT8_4x4_1D
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6
+    VP9_IDCT8_WRITEOUT
+%if ARCH_X86_64
+    movh       [blockq+ 0], m6
+    movh       [blockq+16], m6
+    movh       [blockq+32], m6
+%else
+    mova       [blockq+ 0], m6
+    mova       [blockq+16], m6
+    mova       [blockq+32], m6
+%endif
+    movh       [blockq+48], m6
+    RET
+%endif
+
+.idctfull: ; generic full 8x8 idct/idct
+%if ARCH_X86_64
+    mova                m0, [blockq+  0]    ; IN(0)
+%endif
+    mova                m1, [blockq+ 16]    ; IN(1)
+    mova                m2, [blockq+ 32]    ; IN(2)
+    mova                m3, [blockq+ 48]    ; IN(3)
+%if ARCH_X86_64
+    mova                m4, [blockq+ 64]    ; IN(4)
+%endif
+    mova                m5, [blockq+ 80]    ; IN(5)
+    mova                m6, [blockq+ 96]    ; IN(6)
+    mova                m7, [blockq+112]    ; IN(7)
+%if ARCH_X86_64
+    mova               m11, [pd_8192]       ; rounding
+%define D_8192_REG m11
+%else
+%define D_8192_REG [pd_8192]
+%endif
+    VP9_IDCT8_1D
+%if ARCH_X86_64
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, [blockq+0], [blockq+64], 1
+    mova        [blockq+0], m0
+%endif
+    VP9_IDCT8_1D
+
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6  ; used for the block reset, and VP9_STORE_2X
+    VP9_IDCT8_WRITEOUT
+    ZERO_BLOCK      blockq, 16, 8, m6
+    RET
+%undef W_11585x2_REG
+%endmacro
+
+VP9_IDCT_IDCT_8x8_ADD_XMM sse2, 12
+VP9_IDCT_IDCT_8x8_ADD_XMM ssse3, 13
+VP9_IDCT_IDCT_8x8_ADD_XMM avx, 13
+
+;---------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+; x86-32:
+; - in: m0/3/4/7 are in mem [blockq+N*16]
+; - out: m6 is in mem [blockq+0]
+; x86-64:
+; - everything is in registers
+%macro VP9_IADST8_1D 0 ; input/output=m0/1/2/3/4/5/6/7
+%if ARCH_X86_64
+    SWAP                     0, 8
+    SWAP                     3, 9
+    SWAP                     4, 10
+    SWAP                     7, 11
+%endif
+
+    VP9_UNPACK_MULSUB_2D_4X  5,  2,  0,  3, 14449,  7723    ; m5/2=t3[d], m2/4=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  1,  6,  4,  7,  4756, 15679    ; m1/4=t7[d], m6/7=t6[d]
+    SCRATCH                  4, 12, blockq+1*16
+    VP9_RND_SH_SUMSUB_BA     6,  2,  7,  3, 4, D_8192_REG  ; m6=t2[w], m2=t6[w]
+    UNSCRATCH                4, 12, blockq+1*16
+    VP9_RND_SH_SUMSUB_BA     1,  5,  4,  0, 3, D_8192_REG  ; m1=t3[w], m5=t7[w]
+
+    UNSCRATCH                0,  8, blockq+16*0
+    UNSCRATCH                3,  9, blockq+16*3
+    UNSCRATCH                4, 10, blockq+16*4
+    UNSCRATCH                7, 11, blockq+16*7
+    SCRATCH                  1,  8, blockq+16*1
+    SCRATCH                  2,  9, blockq+16*2
+    SCRATCH                  5, 10, blockq+16*5
+    SCRATCH                  6, 11, blockq+16*6
+
+    VP9_UNPACK_MULSUB_2D_4X  7,  0,  1,  2, 16305,  1606    ; m7/1=t1[d], m0/2=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  4,  5,  6, 10394, 12665    ; m3/5=t5[d], m4/6=t4[d]
+    SCRATCH                  1, 12, blockq+ 0*16
+    VP9_RND_SH_SUMSUB_BA     4,  0,  6,  2, 1, D_8192_REG  ; m4=t0[w], m0=t4[w]
+    UNSCRATCH                1, 12, blockq+ 0*16
+    VP9_RND_SH_SUMSUB_BA     3,  7,  5,  1, 2, D_8192_REG  ; m3=t1[w], m7=t5[w]
+
+    UNSCRATCH                2,  9, blockq+16*2
+    UNSCRATCH                5, 10, blockq+16*5
+    SCRATCH                  3,  9, blockq+16*3
+    SCRATCH                  4, 10, blockq+16*4
+
+    ; m4=t0, m3=t1, m6=t2, m1=t3, m0=t4, m7=t5, m2=t6, m5=t7
+
+    VP9_UNPACK_MULSUB_2D_4X  0,  7,  1,  3, 15137,  6270    ; m0/1=t5[d], m7/3=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  5,  2,  4,  6,  6270, 15137    ; m5/4=t6[d], m2/6=t7[d]
+    SCRATCH                  1, 12, blockq+ 0*16
+    VP9_RND_SH_SUMSUB_BA     5,  7,  4,  3, 1, D_8192_REG
+    UNSCRATCH                1, 12, blockq+ 0*16
+    PSIGNW                  m5, W_M1_REG                    ; m5=out1[w], m7=t6[w]
+    VP9_RND_SH_SUMSUB_BA     2,  0,  6,  1, 3, D_8192_REG   ; m2=out6[w], m0=t7[w]
+
+    UNSCRATCH                1,  8, blockq+16*1
+    UNSCRATCH                3,  9, blockq+16*3
+    UNSCRATCH                4, 10, blockq+16*4
+    UNSCRATCH                6, 11, blockq+16*6
+    SCRATCH                  2,  8, blockq+16*0
+
+    SUMSUB_BA                w,  6,  4, 2                   ; m6=out0[w], m4=t2[w]
+    SUMSUB_BA                w,  1,  3, 2
+    PSIGNW                  m1, W_M1_REG                    ; m1=out7[w], m3=t3[w]
+
+    ; m6=out0, m5=out1, m4=t2, m3=t3, m7=t6, m0=t7, m2=out6, m1=out7
+
+    ; unfortunately, the code below overflows in some cases
+%if 0; cpuflag(ssse3)
+    SUMSUB_BA                w,  3,  4,  2
+    SUMSUB_BA                w,  0,  7,  2
+    pmulhrsw                m3, W_11585x2_REG
+    pmulhrsw                m7, W_11585x2_REG
+    pmulhrsw                m4, W_11585x2_REG               ; out4
+    pmulhrsw                m0, W_11585x2_REG               ; out2
+%else
+    SCRATCH                  5,  9, blockq+16*1
+    VP9_UNPACK_MULSUB_2W_4X  4, 3, 11585, 11585, D_8192_REG, 2, 5
+    VP9_UNPACK_MULSUB_2W_4X  7, 0, 11585, 11585, D_8192_REG, 2, 5
+    UNSCRATCH                5,  9, blockq+16*1
+%endif
+    PSIGNW                  m3, W_M1_REG                    ; out3
+    PSIGNW                  m7, W_M1_REG                    ; out5
+
+    ; m6=out0, m5=out1, m0=out2, m3=out3, m4=out4, m7=out5, m2=out6, m1=out7
+
+%if ARCH_X86_64
+    SWAP                     2, 8
+%endif
+    SWAP                     0, 6, 2
+    SWAP                     7, 1, 5
+%endmacro
+
+%macro IADST8_FN 6
+INIT_XMM %5
+cglobal vp9_%1_%3_8x8_add, 3, 3, %6, dst, stride, block, eob
+
+%ifidn %1, idct
+%define first_is_idct 1
+%else
+%define first_is_idct 0
+%endif
+
+%ifidn %3, idct
+%define second_is_idct 1
+%else
+%define second_is_idct 0
+%endif
+
+%if ARCH_X86_64
+    mova                m0, [blockq+  0]    ; IN(0)
+%endif
+    mova                m1, [blockq+ 16]    ; IN(1)
+    mova                m2, [blockq+ 32]    ; IN(2)
+%if ARCH_X86_64 || first_is_idct
+    mova                m3, [blockq+ 48]    ; IN(3)
+%endif
+%if ARCH_X86_64
+    mova                m4, [blockq+ 64]    ; IN(4)
+%endif
+    mova                m5, [blockq+ 80]    ; IN(5)
+    mova                m6, [blockq+ 96]    ; IN(6)
+%if ARCH_X86_64 || first_is_idct
+    mova                m7, [blockq+112]    ; IN(7)
+%endif
+%if ARCH_X86_64
+%if cpuflag(ssse3)
+    mova               m15, [pw_11585x2]    ; often used
+%endif
+    mova               m13, [pd_8192]       ; rounding
+    mova               m14, [pw_m1]
+%define W_11585x2_REG m15
+%define D_8192_REG m13
+%define W_M1_REG m14
+%else
+%define W_11585x2_REG [pw_11585x2]
+%define D_8192_REG [pd_8192]
+%define W_M1_REG [pw_m1]
+%endif
+
+    ; note different calling conventions for idct8 vs. iadst8 on x86-32
+    VP9_%2_1D
+%if ARCH_X86_64
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, 8
+%else
+    TRANSPOSE8x8W  0, 1, 2, 3, 4, 5, 6, 7, [blockq+0], [blockq+64], 1
+    mova      [blockq+  0], m0
+%if second_is_idct == 0
+    mova      [blockq+ 48], m3
+    mova      [blockq+112], m7
+%endif
+%endif
+    VP9_%4_1D
+
+%if ARCH_X86_64
+    SWAP                 6, 8
+%endif
+    pxor                m6, m6  ; used for the block reset, and VP9_STORE_2X
+    VP9_IDCT8_WRITEOUT
+    ZERO_BLOCK      blockq, 16, 8, m6
+    RET
+
+%undef W_11585x2_REG
+%undef first_is_idct
+%undef second_is_idct
+
+%endmacro
+
+IADST8_FN idct,  IDCT8,  iadst, IADST8, sse2, 15
+IADST8_FN iadst, IADST8, idct,  IDCT8,  sse2, 15
+IADST8_FN iadst, IADST8, iadst, IADST8, sse2, 15
+IADST8_FN idct,  IDCT8,  iadst, IADST8, ssse3, 16
+IADST8_FN idct,  IDCT8,  iadst, IADST8, avx, 16
+IADST8_FN iadst, IADST8, idct,  IDCT8,  ssse3, 16
+IADST8_FN iadst, IADST8, idct,  IDCT8,  avx, 16
+IADST8_FN iadst, IADST8, iadst, IADST8, ssse3, 16
+IADST8_FN iadst, IADST8, iadst, IADST8, avx, 16
+
+;---------------------------------------------------------------------------------------------
+; void vp9_idct_idct_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+; x86-64:
+; at the end of this macro, m7 is stored in [%4+15*%5]
+; everything else (t0-6 and t8-15) is stored in m0-6 and m8-15
+; the following sumsubs have not been done yet:
+;    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+;    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+; or (x86-32) t0-t5 are in m0-m5, t10-t15 are in x11/9/7/5/3/1,
+; and the following simsubs have not been done yet:
+;    SUMSUB_BA            w, x13, x14, 7       ; t6, t9
+;    SUMSUB_BA            w, x15, x12, 7       ; t7, t8
+
+%macro VP9_IDCT16_1D_START 6 ; src, nnzc, stride, scratch, scratch_stride, is_iadst
+%if %2 <= 4
+    mova                m3, [%1+ 1*%3]      ; IN(1)
+    mova                m0, [%1+ 3*%3]      ; IN(3)
+
+    pmulhrsw            m4, m3,  [pw_16305x2]       ; t14-15
+    pmulhrsw            m3, [pw_1606x2]             ; t8-9
+    pmulhrsw            m7, m0,  [pw_m4756x2]       ; t10-11
+    pmulhrsw            m0, [pw_15679x2]            ; t12-13
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7
+    ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15
+
+    VP9_UNPACK_MULSUB_2W_4X 2, 5, 4, 3, 15137,  6270, [pd_8192], 1, 6 ; t9,  t14
+    SCRATCH              4, 10, %4+ 1*%5
+    SCRATCH              5, 11, %4+ 7*%5
+    VP9_UNPACK_MULSUB_2W_4X 6, 1, 0, 7, 6270, m15137, [pd_8192], 4, 5 ; t10, t13
+    UNSCRATCH            5, 11, %4+ 7*%5
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15
+%else
+    mova                m5, [%1+ 1*%3]      ; IN(1)
+    mova                m4, [%1+ 7*%3]      ; IN(7)
+%if %2 <= 8
+    pmulhrsw            m2, m5,  [pw_16305x2]       ; t15
+    pmulhrsw            m5, [pw_1606x2]             ; t8
+    pmulhrsw            m3, m4,  [pw_m10394x2]      ; t9
+    pmulhrsw            m4, [pw_12665x2]            ; t14
+%else
+    mova                m3, [%1+ 9*%3]      ; IN(9)
+    mova                m2, [%1+15*%3]      ; IN(15)
+
+    ; m10=in0, m5=in1, m14=in2, m6=in3, m9=in4, m7=in5, m15=in6, m4=in7
+    ; m11=in8, m3=in9, m12=in10 m0=in11, m8=in12, m1=in13, m13=in14, m2=in15
+
+    VP9_UNPACK_MULSUB_2W_4X   5,   2, 16305,  1606, [pd_8192], 0, 1 ; t8,  t15
+    VP9_UNPACK_MULSUB_2W_4X   3,   4, 10394, 12665, [pd_8192], 0, 1 ; t9,  t14
+%endif
+
+    SUMSUB_BA            w,  3,  5, 0       ; t8,  t9
+    SUMSUB_BA            w,  4,  2, 0       ; t15, t14
+
+    VP9_UNPACK_MULSUB_2W_4X   2,   5, 15137,  6270, [pd_8192], 0, 1 ; t9,  t14
+
+    SCRATCH              4, 10, %4+ 1*%5
+    SCRATCH              5, 11, %4+ 7*%5
+
+    mova                m6, [%1+ 3*%3]      ; IN(3)
+    mova                m7, [%1+ 5*%3]      ; IN(5)
+%if %2 <= 8
+    pmulhrsw            m0, m7,  [pw_14449x2]       ; t13
+    pmulhrsw            m7, [pw_7723x2]             ; t10
+    pmulhrsw            m1, m6,  [pw_m4756x2]       ; t11
+    pmulhrsw            m6, [pw_15679x2]            ; t12
+%else
+    mova                m0, [%1+11*%3]      ; IN(11)
+    mova                m1, [%1+13*%3]      ; IN(13)
+
+    VP9_UNPACK_MULSUB_2W_4X   7,   0, 14449,  7723, [pd_8192], 4, 5 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X   1,   6,  4756, 15679, [pd_8192], 4, 5 ; t11, t12
+%endif
+
+    ; m11=t0, m10=t1, m9=t2, m8=t3, m14=t4, m12=t5, m15=t6, m13=t7
+    ; m5=t8, m3=t9, m7=t10, m1=t11, m6=t12, m0=t13, m4=t14, m2=t15
+
+    SUMSUB_BA            w,  7,  1, 4       ; t11, t10
+    SUMSUB_BA            w,  0,  6, 4       ; t12, t13
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7
+    ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15
+
+    VP9_UNPACK_MULSUB_2W_4X   6,   1, 6270, m15137, [pd_8192], 4, 5 ; t10, t13
+
+    UNSCRATCH            5, 11, %4+ 7*%5
+%endif
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m13=t5, m14=t6, m15=t7
+    ; m3=t8, m2=t9, m6=t10, m7=t11, m0=t12, m1=t13, m5=t14, m4=t15
+
+    SUMSUB_BA            w,  7,  3, 4       ; t8,  t11
+
+    ; backup first register
+    mova        [%4+15*%5], m7
+
+    SUMSUB_BA            w,  6,  2, 7       ; t9,  t10
+    UNSCRATCH            4, 10, %4+ 1*%5
+    SUMSUB_BA            w,  0,  4, 7       ; t15, t12
+    SUMSUB_BA            w,  1,  5, 7       ; t14. t13
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15
+
+%if cpuflag(ssse3) && %6 == 0
+    SUMSUB_BA            w,  2,  5, 7
+    SUMSUB_BA            w,  3,  4, 7
+    pmulhrsw            m5, [pw_11585x2]    ; t10
+    pmulhrsw            m4, [pw_11585x2]    ; t11
+    pmulhrsw            m3, [pw_11585x2]    ; t12
+    pmulhrsw            m2, [pw_11585x2]    ; t13
+%else
+    SCRATCH              6, 10, %4+ 1*%5
+    VP9_UNPACK_MULSUB_2W_4X   5,   2, 11585, 11585, [pd_8192], 6, 7 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X   4,   3, 11585, 11585, [pd_8192], 6, 7 ; t11, t12
+    UNSCRATCH            6, 10, %4+ 1*%5
+%endif
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m5=t10, m4=t11, m3=t12, m2=t13, m1=t14, m0=t15
+
+    SCRATCH              0,  8, %4+ 1*%5
+    SCRATCH              1,  9, %4+ 3*%5
+    SCRATCH              2, 10, %4+ 5*%5
+    SCRATCH              3, 11, %4+ 7*%5
+    SCRATCH              4, 12, %4+ 9*%5
+    SCRATCH              5, 13, %4+11*%5
+    SCRATCH              6, 14, %4+13*%5
+
+    ; even (tx8x8)
+%if %2 <= 4
+    mova                m3, [%1+ 0*%3]      ; IN(0)
+    mova                m4, [%1+ 2*%3]      ; IN(2)
+
+    pmulhrsw            m3, [pw_11585x2]    ; t0-t3
+    pmulhrsw            m7, m4, [pw_16069x2]        ; t6-7
+    pmulhrsw            m4, [pw_3196x2]             ; t4-5
+
+%if 0 ; overflows :(
+    paddw               m6, m7, m4
+    psubw               m5, m7, m4
+    pmulhrsw            m5, [pw_11585x2]            ; t5
+    pmulhrsw            m6, [pw_11585x2]            ; t6
+%else
+    VP9_UNPACK_MULSUB_2W_4X  5, 6, 7, 4, 11585, 11585, [pd_8192], 0, 1 ; t5,  t6
+%endif
+
+    psubw               m0, m3, m7
+    paddw               m7, m3
+    psubw               m1, m3, m6
+    paddw               m6, m3
+    psubw               m2, m3, m5
+    paddw               m5, m3
+
+%if ARCH_X86_32
+    SWAP                 0, 7
+%endif
+    SCRATCH              7, 15, %4+12*%5
+%else
+    mova                m6, [%1+ 2*%3]      ; IN(2)
+    mova                m1, [%1+ 4*%3]      ; IN(4)
+    mova                m7, [%1+ 6*%3]      ; IN(6)
+%if %2 <= 8
+    pmulhrsw            m0, m1,  [pw_15137x2]       ; t3
+    pmulhrsw            m1, [pw_6270x2]             ; t2
+    pmulhrsw            m5, m6, [pw_16069x2]        ; t7
+    pmulhrsw            m6, [pw_3196x2]             ; t4
+    pmulhrsw            m4, m7, [pw_m9102x2]        ; t5
+    pmulhrsw            m7, [pw_13623x2]            ; t6
+%else
+    mova                m4, [%1+10*%3]      ; IN(10)
+    mova                m0, [%1+12*%3]      ; IN(12)
+    mova                m5, [%1+14*%3]      ; IN(14)
+
+    VP9_UNPACK_MULSUB_2W_4X   1,   0, 15137,  6270, [pd_8192], 2, 3 ; t2,  t3
+    VP9_UNPACK_MULSUB_2W_4X   6,   5, 16069,  3196, [pd_8192], 2, 3 ; t4,  t7
+    VP9_UNPACK_MULSUB_2W_4X   4,   7,  9102, 13623, [pd_8192], 2, 3 ; t5,  t6
+%endif
+
+    SUMSUB_BA            w,  4,  6, 2       ; t4,  t5
+    SUMSUB_BA            w,  7,  5, 2       ; t7,  t6
+
+%if cpuflag(ssse3) && %6 == 0
+    SUMSUB_BA            w,  6,  5, 2
+    pmulhrsw            m5, [pw_11585x2]                              ; t5
+    pmulhrsw            m6, [pw_11585x2]                              ; t6
+%else
+    VP9_UNPACK_MULSUB_2W_4X  5,  6, 11585, 11585, [pd_8192], 2, 3 ; t5,  t6
+%endif
+
+    SCRATCH              5, 15, %4+10*%5
+    mova                m2, [%1+ 0*%3]      ; IN(0)
+%if %2 <= 8
+    pmulhrsw            m2, [pw_11585x2]    ; t0 and t1
+    psubw               m3, m2, m0
+    paddw               m0, m2
+
+    SUMSUB_BA            w,  7,  0, 5       ; t0,  t7
+%else
+    mova                m3, [%1+ 8*%3]      ; IN(8)
+
+    ; from 3 stages back
+%if cpuflag(ssse3) && %6 == 0
+    SUMSUB_BA            w,  3,  2, 5
+    pmulhrsw            m3, [pw_11585x2]    ; t0
+    pmulhrsw            m2, [pw_11585x2]    ; t1
+%else
+    mova        [%1+ 0*%3], m0
+    VP9_UNPACK_MULSUB_2W_4X  2,  3, 11585,  11585, [pd_8192], 5, 0 ; t0, t1
+    mova                m0, [%1+ 0*%3]
+%endif
+
+    ; from 2 stages back
+    SUMSUB_BA            w,  0,  3, 5      ; t0,  t3
+
+    SUMSUB_BA            w,  7,  0, 5      ; t0,  t7
+%endif
+    UNSCRATCH            5, 15, %4+10*%5
+%if ARCH_X86_32
+    SWAP                 0, 7
+%endif
+    SCRATCH              7, 15, %4+12*%5
+    SUMSUB_BA            w,  1,  2, 7       ; t1,  t2
+
+    ; from 1 stage back
+    SUMSUB_BA            w,  6,  1, 7       ; t1,  t6
+    SUMSUB_BA            w,  5,  2, 7       ; t2,  t5
+%endif
+    SUMSUB_BA            w,  4,  3, 7       ; t3,  t4
+
+%if ARCH_X86_64
+    SWAP                 0, 8
+    SWAP                 1, 9
+    SWAP                 2, 10
+    SWAP                 3, 11
+    SWAP                 4, 12
+    SWAP                 5, 13
+    SWAP                 6, 14
+
+    SUMSUB_BA            w,  0, 15, 7       ; t0, t15
+    SUMSUB_BA            w,  1, 14, 7       ; t1, t14
+    SUMSUB_BA            w,  2, 13, 7       ; t2, t13
+    SUMSUB_BA            w,  3, 12, 7       ; t3, t12
+    SUMSUB_BA            w,  4, 11, 7       ; t4, t11
+    SUMSUB_BA            w,  5, 10, 7       ; t5, t10
+%else
+    SWAP                 1, 6
+    SWAP                 2, 5
+    SWAP                 3, 4
+    mova        [%4+14*%5], m6
+
+%macro %%SUMSUB_BA_STORE 5 ; reg, from_mem, to_mem, scratch, scratch_stride
+    mova                m6, [%4+%2*%5]
+    SUMSUB_BA            w,  6, %1, 7
+    SWAP                %1, 6
+    mova        [%4+%3*%5], m6
+%endmacro
+
+    %%SUMSUB_BA_STORE    0,  1,  1, %4, %5  ; t0, t15
+    %%SUMSUB_BA_STORE    1,  3,  3, %4, %5  ; t1, t14
+    %%SUMSUB_BA_STORE    2,  5,  5, %4, %5  ; t2, t13
+    %%SUMSUB_BA_STORE    3,  7,  7, %4, %5  ; t3, t12
+    %%SUMSUB_BA_STORE    4,  9,  9, %4, %5  ; t4, t11
+    %%SUMSUB_BA_STORE    5, 11, 11, %4, %5  ; t5, t10
+%endif
+%endmacro
+
+%macro VP9_IDCT16_1D 2-4 16, 1 ; src, pass, nnzc, is_iadst
+%if %2 == 1
+    VP9_IDCT16_1D_START %1, %3, 32, tmpq, 16, %4
+
+%if ARCH_X86_64
+    ; backup a different register
+    mova                m7, [tmpq+15*16]
+    mova      [tmpq+ 1*16], m15
+
+    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 15
+    mova        [tmpq+  0], m0
+    mova        [tmpq+ 32], m1
+    mova        [tmpq+ 64], m2
+    mova        [tmpq+ 96], m3
+    mova        [tmpq+128], m4
+    mova        [tmpq+160], m5
+    mova        [tmpq+192], m6
+    mova        [tmpq+224], m7
+
+    mova               m15, [tmpq+ 1*16]
+    TRANSPOSE8x8W        8, 9, 10, 11, 12, 13, 14, 15, 0
+    mova        [tmpq+ 16], m8
+    mova        [tmpq+ 48], m9
+    mova        [tmpq+ 80], m10
+    mova        [tmpq+112], m11
+    mova        [tmpq+144], m12
+    mova        [tmpq+176], m13
+    mova        [tmpq+208], m14
+    mova        [tmpq+240], m15
+%else
+    mova                m6, [tmpq+13*16]
+    mova                m7, [tmpq+14*16]
+    SUMSUB_BA            w, 6, 7                ; t6, t9
+    mova      [tmpq+14*16], m6
+    mova      [tmpq+13*16], m7
+    mova                m7, [tmpq+15*16]
+    mova                m6, [tmpq+12*16]
+    SUMSUB_BA            w, 7, 6                ; t7, t8
+    mova      [tmpq+15*16], m6
+
+    TRANSPOSE8x8W       0, 1, 2, 3, 4, 5, 6, 7, [tmpq+14*16], [tmpq+ 8*16], 1
+    mova     [tmpq+ 0*16], m0
+    mova     [tmpq+ 2*16], m1
+    mova     [tmpq+ 4*16], m2
+    mova     [tmpq+ 6*16], m3
+    mova     [tmpq+10*16], m5
+    mova     [tmpq+12*16], m6
+    mova     [tmpq+14*16], m7
+
+    mova                m0, [tmpq+15*16]
+    mova                m1, [tmpq+13*16]
+    mova                m2, [tmpq+11*16]
+    mova                m3, [tmpq+ 9*16]
+    mova                m4, [tmpq+ 7*16]
+    mova                m5, [tmpq+ 5*16]
+    mova                m7, [tmpq+ 1*16]
+    TRANSPOSE8x8W       0, 1, 2, 3, 4, 5, 6, 7, [tmpq+ 3*16], [tmpq+ 9*16], 1
+    mova     [tmpq+ 1*16], m0
+    mova     [tmpq+ 3*16], m1
+    mova     [tmpq+ 5*16], m2
+    mova     [tmpq+ 7*16], m3
+    mova     [tmpq+11*16], m5
+    mova     [tmpq+13*16], m6
+    mova     [tmpq+15*16], m7
+%endif
+%else ; %2 == 2
+    VP9_IDCT16_1D_START %1, %3, 32, %1, 32, %4
+
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_512]
+%else
+%define ROUND_REG [pw_32]
+%endif
+
+    pxor                m7, m7
+%if ARCH_X86_64
+    ; backup more registers
+    mova        [%1+ 2*32], m8
+    mova        [%1+ 3*32], m9
+
+    VP9_IDCT8_WRITEx2    0,  1, 8, 9, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    2,  3, 8, 9, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    4,  5, 8, 9, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    ; restore from cache
+    SWAP                 0, 7               ; move zero from m7 to m0
+    mova                m7, [%1+15*32]
+    mova                m8, [%1+ 2*32]
+    mova                m9, [%1+ 3*32]
+
+    SUMSUB_BA            w,  6,  9, 3       ; t6, t9
+    SUMSUB_BA            w,  7,  8, 3       ; t7, t8
+
+    VP9_IDCT8_WRITEx2    6,  7, 3, 4, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    8,  9, 3, 4, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2   10, 11, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2   12, 13, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2   14, 15, 1, 2, 0, ROUND_REG, 6
+%else
+    mova      [tmpq+ 0*32], m5
+
+    VP9_IDCT8_WRITEx2    0,  1, 5, 6, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    2,  3, 5, 6, 7, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    SWAP                 0, 7               ; move zero from m7 to m0
+    mova                m5, [tmpq+ 0*32]
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    mova                m4, [tmpq+13*32]
+    mova                m7, [tmpq+14*32]
+    mova                m5, [tmpq+15*32]
+    mova                m6, [tmpq+12*32]
+    SUMSUB_BADC w, 4, 7, 5, 6, 1
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    mova                m4, [tmpq+11*32]
+    mova                m5, [tmpq+ 9*32]
+    mova                m6, [tmpq+ 7*32]
+    mova                m7, [tmpq+ 5*32]
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2    6,  7, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+
+    mova                m4, [tmpq+ 3*32]
+    mova                m5, [tmpq+ 1*32]
+
+    VP9_IDCT8_WRITEx2    4,  5, 1, 2, 0, ROUND_REG, 6
+    lea               dstq, [dstq+strideq*2]
+%endif
+
+%undef ROUND_REG
+%endif ; %2 == 1/2
+%endmacro
+
+%macro VP9_STORE_2XFULL 6-7 strideq; dc, tmp1, tmp2, tmp3, tmp4, zero, stride
+    mova               m%3, [dstq]
+    mova               m%5, [dstq+%7]
+    punpcklbw          m%2, m%3, m%6
+    punpckhbw          m%3, m%6
+    punpcklbw          m%4, m%5, m%6
+    punpckhbw          m%5, m%6
+    paddw              m%2, m%1
+    paddw              m%3, m%1
+    paddw              m%4, m%1
+    paddw              m%5, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova            [dstq], m%2
+    mova         [dstq+%7], m%4
+%endmacro
+
+%macro VP9_IDCT_IDCT_16x16_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_16x16_add, 4, 6, 16, 512, dst, stride, block, eob
+%if cpuflag(ssse3)
+    ; 2x2=eob=3, 4x4=eob=10
+    cmp eobd, 38
+    jg .idctfull
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idct8x8
+%else
+    cmp eobd, 1 ; faster path for when only DC is set
+    jg .idctfull
+%endif
+
+    ; dc-only
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    mova                m1, [pw_11585x2]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (32 << 14) + 8192
+    sar              coefd, 14 + 6
+    movd                m0, coefd
+%endif
+    SPLATW              m0, m0, q0000
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_512]
+%endif
+    pxor                m5, m5
+    movd          [blockq], m5
+%rep 7
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+2*strideq]
+%endrep
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5
+    RET
+
+    DEFINE_ARGS dst, stride, block, cnt, dst_bak, tmp
+%if cpuflag(ssse3)
+.idct8x8:
+    mov               tmpq, rsp
+    VP9_IDCT16_1D   blockq, 1, 8, 0
+
+    mov               cntd, 2
+    mov           dst_bakq, dstq
+.loop2_8x8:
+    VP9_IDCT16_1D     tmpq, 2, 8, 0
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_8x8
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 8, m0
+    RET
+%endif
+
+.idctfull:
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT16_1D   blockq, 1, 16, 0
+    add             blockq, 16
+    add               tmpq, 256
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 32
+
+    mov               cntd, 2
+    mov               tmpq, rsp
+    mov           dst_bakq, dstq
+.loop2_full:
+    VP9_IDCT16_1D     tmpq, 2, 16, 0
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_16x16_ADD_XMM sse2
+VP9_IDCT_IDCT_16x16_ADD_XMM ssse3
+VP9_IDCT_IDCT_16x16_ADD_XMM avx
+
+;---------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IADST16_1D 2 ; src, pass
+%assign %%str 16*%2
+    mova                m0, [%1+ 0*32]  ; in0
+    mova                m1, [%1+15*32]  ; in15
+    mova                m2, [%1+ 7*32]  ; in7
+    mova                m3, [%1+ 8*32]  ; in8
+
+    VP9_UNPACK_MULSUB_2D_4X  1,  0,  4,  5, 16364,   804    ; m1/4=t1[d], m0/5=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  2,  3,  7,  6, 11003, 12140    ; m2/7=t9[d], m3/6=t8[d]
+    SCRATCH              4, 8, tmpq+ 0*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  0,  6,  5,  4, [pd_8192]   ; m3=t0[w], m0=t8[w]
+    UNSCRATCH            4, 8, tmpq+ 0*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  1,  7,  4,  5, [pd_8192]   ; m2=t1[w], m1=t9[w]
+
+    SCRATCH              0, 10, tmpq+ 0*%%str
+    SCRATCH              1, 11, tmpq+15*%%str
+    mova   [tmpq+ 7*%%str], m2
+    mova   [tmpq+ 8*%%str], m3
+
+    mova                m1, [%1+ 2*32]  ; in2
+    mova                m0, [%1+13*32]  ; in13
+    mova                m3, [%1+ 5*32]  ; in5
+    mova                m2, [%1+10*32]  ; in10
+
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  6,  7, 15893,  3981    ; m0/6=t3[d], m1/7=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  4,  5,  8423, 14053    ; m3/4=t11[d], m2/5=t10[d]
+    SCRATCH              4, 12, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  1,  5,  7,  4, [pd_8192]   ; m2=t2[w], m1=t10[w]
+    UNSCRATCH            4, 12, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  0,  4,  6,  5, [pd_8192]   ; m3=t3[w], m0=t11[w]
+
+    SCRATCH              0, 12, tmpq+ 2*%%str
+    SCRATCH              1, 13, tmpq+13*%%str
+    mova   [tmpq+ 5*%%str], m2
+    mova   [tmpq+10*%%str], m3
+
+    mova                m2, [%1+ 4*32]  ; in4
+    mova                m3, [%1+11*32]  ; in11
+    mova                m0, [%1+ 3*32]  ; in3
+    mova                m1, [%1+12*32]  ; in12
+
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  7,  6, 14811,  7005    ; m3/7=t5[d], m2/6=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  4,  5,  5520, 15426    ; m0/4=t13[d], m1/5=t12[d]
+    SCRATCH              4, 9, tmpq+ 4*%%str
+    VP9_RND_SH_SUMSUB_BA     1,  2,  5,  6,  4, [pd_8192]   ; m1=t4[w], m2=t12[w]
+    UNSCRATCH            4, 9, tmpq+ 4*%%str
+    VP9_RND_SH_SUMSUB_BA     0,  3,  4,  7,  6, [pd_8192]   ; m0=t5[w], m3=t13[w]
+
+    SCRATCH              0,  8, tmpq+ 4*%%str
+    mova   [tmpq+11*%%str], m1          ; t4:m1->r11
+    UNSCRATCH            0, 10, tmpq+ 0*%%str
+    UNSCRATCH            1, 11, tmpq+15*%%str
+
+    ; round 2 interleaved part 1
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  6,  7, 16069,  3196    ; m1/7=t8[d], m0/6=t9[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  5,  4,  3196, 16069    ; m3/5=t12[d], m2/4=t13[d]
+    SCRATCH              4, 9, tmpq+ 3*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  1,  5,  7,  4, [pd_8192]   ; m3=t8[w], m1=t12[w]
+    UNSCRATCH            4, 9, tmpq+ 3*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  0,  4,  6,  5, [pd_8192]   ; m2=t9[w], m0=t13[w]
+
+    SCRATCH              0, 10, tmpq+ 0*%%str
+    SCRATCH              1, 11, tmpq+15*%%str
+    SCRATCH              2, 14, tmpq+ 3*%%str
+    SCRATCH              3, 15, tmpq+12*%%str
+
+    mova                m2, [%1+ 6*32]  ; in6
+    mova                m3, [%1+ 9*32]  ; in9
+    mova                m0, [%1+ 1*32]  ; in1
+    mova                m1, [%1+14*32]  ; in14
+
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  7,  6, 13160,  9760    ; m3/7=t7[d], m2/6=t6[d]
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  4,  5,  2404, 16207    ; m0/4=t15[d], m1/5=t14[d]
+    SCRATCH              4, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     1,  2,  5,  6,  4, [pd_8192]   ; m1=t6[w], m2=t14[w]
+    UNSCRATCH            4, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     0,  3,  4,  7,  6, [pd_8192]   ; m0=t7[w], m3=t15[w]
+
+    ; r8=t0, r7=t1, r5=t2, r10=t3, r11=t4, m8|r4=t5, m1=t6, m0=t7
+    ; m10|r0=t8, m11|r15=t9, m13|r13=t10, m12|r2=t11, m14|r3=t12, m15|r12=t13, m2=t14, m3=t15
+
+    UNSCRATCH            4, 12, tmpq+ 2*%%str
+    UNSCRATCH            5, 13, tmpq+13*%%str
+    SCRATCH              0, 12, tmpq+ 1*%%str
+    SCRATCH              1, 13, tmpq+14*%%str
+
+    ; remainder of round 2 (rest of t8-15)
+    VP9_UNPACK_MULSUB_2D_4X  5,  4,  6,  7,  9102, 13623    ; m5/6=t11[d], m4/7=t10[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  1,  0, 13623,  9102    ; m3/1=t14[d], m2/0=t15[d]
+    SCRATCH              0, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     3,  4,  1,  7,  0, [pd_8192]   ; m3=t10[w], m4=t14[w]
+    UNSCRATCH            0, 9, tmpq+ 6*%%str
+    VP9_RND_SH_SUMSUB_BA     2,  5,  0,  6,  1, [pd_8192]   ; m2=t11[w], m5=t15[w]
+
+    ; m15|r12=t8, m14|r3=t9, m3=t10, m2=t11, m11|r15=t12, m10|r0=t13, m4=t14, m5=t15
+
+    UNSCRATCH            6, 14, tmpq+ 3*%%str
+    UNSCRATCH            7, 15, tmpq+12*%%str
+
+    SUMSUB_BA                w,  3,  7,  1
+    PSIGNW                  m3, [pw_m1]                     ; m3=out1[w], m7=t10[w]
+    SUMSUB_BA                w,  2,  6,  1                  ; m2=out14[w], m6=t11[w]
+
+    ; unfortunately, the code below overflows in some cases, e.g.
+    ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8.webm
+%if 0; cpuflag(ssse3)
+    SUMSUB_BA                w,  7,  6,  1
+    pmulhrsw                m7, [pw_11585x2]                ; m7=out6[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m6=out9[w]
+%else
+    VP9_UNPACK_MULSUB_2W_4X  6,  7, 11585, 11585, [pd_8192], 1, 0
+%endif
+
+    mova       [tmpq+ 3*%%str], m6
+    mova       [tmpq+ 6*%%str], m7
+    UNSCRATCH                6, 10, tmpq+ 0*%%str
+    UNSCRATCH                7, 11, tmpq+15*%%str
+    mova       [tmpq+13*%%str], m2
+    SCRATCH                  3, 11, tmpq+ 9*%%str
+
+    VP9_UNPACK_MULSUB_2D_4X  7,  6,  2,  3, 15137,  6270    ; m6/3=t13[d], m7/2=t12[d]
+    VP9_UNPACK_MULSUB_2D_4X  5,  4,  1,  0,  6270, 15137    ; m5/1=t14[d], m4/0=t15[d]
+    SCRATCH              0, 9, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     5,  6,  1,  3,  0, [pd_8192]   ; m5=out2[w], m6=t14[w]
+    UNSCRATCH            0, 9, tmpq+ 2*%%str
+    VP9_RND_SH_SUMSUB_BA     4,  7,  0,  2,  1, [pd_8192]
+    PSIGNW                  m4, [pw_m1]                     ; m4=out13[w], m7=t15[w]
+
+    ; unfortunately, the code below overflows in some cases
+%if 0; cpuflag(ssse3)
+    SUMSUB_BA                w,  7,  6,  1
+    pmulhrsw                m7, [pw_m11585x2]               ; m7=out5[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m6=out10[w]
+%else
+    PSIGNW                  m7, [pw_m1]
+    VP9_UNPACK_MULSUB_2W_4X  7,  6, 11585, 11585, [pd_8192], 1, 0
+%endif
+
+    ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, m6=out10, m4=out13, r2=out14
+
+    mova                    m2, [tmpq+ 8*%%str]
+    mova                    m3, [tmpq+ 7*%%str]
+    mova                    m1, [tmpq+11*%%str]
+    mova       [tmpq+ 7*%%str], m6
+    mova       [tmpq+11*%%str], m4
+    mova                    m4, [tmpq+ 5*%%str]
+    SCRATCH                  5, 14, tmpq+ 5*%%str
+    SCRATCH                  7, 15, tmpq+ 8*%%str
+    UNSCRATCH                6,  8, tmpq+ 4*%%str
+    UNSCRATCH                5, 12, tmpq+ 1*%%str
+    UNSCRATCH                7, 13, tmpq+14*%%str
+
+    ; m2=t0, m3=t1, m9=t2, m0=t3, m1=t4, m8=t5, m13=t6, m12=t7
+    ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, r10=out10, r11=out13, r2=out14
+
+    SUMSUB_BA                w,  1,  2, 0                   ; m1=t0[w], m2=t4[w]
+    mova                    m0, [tmpq+10*%%str]
+    SCRATCH                  1, 12, tmpq+ 1*%%str
+    SUMSUB_BA                w,  6,  3, 1                   ; m8=t1[w], m3=t5[w]
+    SCRATCH                  6, 13, tmpq+ 4*%%str
+    SUMSUB_BA                w,  7,  4, 1                   ; m13=t2[w], m9=t6[w]
+    SCRATCH                  7,  8, tmpq+10*%%str
+    SUMSUB_BA                w,  5,  0, 1                   ; m12=t3[w], m0=t7[w]
+    SCRATCH                  5,  9, tmpq+14*%%str
+
+    VP9_UNPACK_MULSUB_2D_4X  2,  3,  7,  5, 15137,  6270    ; m2/6=t5[d], m3/10=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  0,  4,  1,  6,  6270, 15137    ; m0/14=t6[d], m9/15=t7[d]
+    SCRATCH                  6, 10, tmpq+ 0*%%str
+    VP9_RND_SH_SUMSUB_BA     0,  3,  1,  5,  6, [pd_8192]
+    UNSCRATCH                6, 10, tmpq+ 0*%%str
+    PSIGNW                  m0, [pw_m1]                     ; m0=out3[w], m3=t6[w]
+    VP9_RND_SH_SUMSUB_BA     4,  2,  6,  7,  5, [pd_8192]   ; m9=out12[w], m2=t7[w]
+
+    UNSCRATCH                1,  8, tmpq+10*%%str
+    UNSCRATCH                5,  9, tmpq+14*%%str
+    UNSCRATCH                6, 12, tmpq+ 1*%%str
+    UNSCRATCH                7, 13, tmpq+ 4*%%str
+    SCRATCH                  4,  9, tmpq+14*%%str
+
+    SUMSUB_BA                w,  1,  6,  4                  ; m13=out0[w], m1=t2[w]
+    SUMSUB_BA                w,  5,  7,  4
+    PSIGNW                  m5, [pw_m1]                     ; m12=out15[w], m8=t3[w]
+
+    ; unfortunately, the code below overflows in some cases, e.g.
+    ; http://downloads.webmproject.org/test_data/libvpx/vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm
+%if 0 ; cpuflag(ssse3)
+    SUMSUB_BA               w,   7,  6,  4
+    pmulhrsw                m7, [pw_m11585x2]               ; m8=out7[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m1=out8[w]
+    SWAP                     6,  7
+    SUMSUB_BA                w,  3,  2,  4
+    pmulhrsw                m3, [pw_11585x2]                ; m3=out4[w]
+    pmulhrsw                m2, [pw_11585x2]                ; m2=out11[w]
+%else
+    SCRATCH                  5,  8, tmpq+10*%%str
+    VP9_UNPACK_MULSUB_2W_4X  6,  7, 11585, m11585, [pd_8192],  5,  4
+    VP9_UNPACK_MULSUB_2W_4X  2,  3, 11585, 11585, [pd_8192],  5,  4
+    UNSCRATCH                5,  8, tmpq+10*%%str
+%endif
+
+    ; m13=out0, m0=out3, m3=out4, m8=out7, m1=out8, m2=out11, m9=out12, m12=out15
+    ; m11|r13=out1, m5=out2, m7=out5, r15=out6, r3=out9, r10=out10, r11=out13, r2=out14
+
+%if %2 == 1
+%if ARCH_X86_64
+    mova                   m13, [tmpq+ 6*%%str]
+    TRANSPOSE8x8W            1, 11, 14, 0, 3, 15, 13, 6, 10
+    mova          [tmpq+ 0*16], m1
+    mova          [tmpq+ 2*16], m11
+    mova          [tmpq+ 4*16], m14
+    mova          [tmpq+ 6*16], m0
+    mova                    m1, [tmpq+ 3*%%str]
+    mova                   m11, [tmpq+ 7*%%str]
+    mova                   m14, [tmpq+11*%%str]
+    mova                    m0, [tmpq+13*%%str]
+    mova          [tmpq+ 8*16], m3
+    mova          [tmpq+10*16], m15
+    mova          [tmpq+12*16], m13
+    mova          [tmpq+14*16], m6
+
+    TRANSPOSE8x8W            7, 1, 11, 2, 9, 14, 0, 5, 10
+    mova          [tmpq+ 1*16], m7
+    mova          [tmpq+ 3*16], m1
+    mova          [tmpq+ 5*16], m11
+    mova          [tmpq+ 7*16], m2
+    mova          [tmpq+ 9*16], m9
+    mova          [tmpq+11*16], m14
+    mova          [tmpq+13*16], m0
+    mova          [tmpq+15*16], m5
+%else
+    mova       [tmpq+12*%%str], m2
+    mova       [tmpq+ 1*%%str], m5
+    mova       [tmpq+15*%%str], m7
+    mova                    m2, [tmpq+ 9*%%str]
+    mova                    m5, [tmpq+ 5*%%str]
+    mova                    m7, [tmpq+ 8*%%str]
+    TRANSPOSE8x8W            1, 2, 5, 0, 3, 7, 4, 6, [tmpq+ 6*%%str], [tmpq+ 8*%%str], 1
+    mova          [tmpq+ 0*16], m1
+    mova          [tmpq+ 2*16], m2
+    mova          [tmpq+ 4*16], m5
+    mova          [tmpq+ 6*16], m0
+    mova          [tmpq+10*16], m7
+    mova                    m3, [tmpq+12*%%str]
+    mova          [tmpq+12*16], m4
+    mova                    m4, [tmpq+14*%%str]
+    mova          [tmpq+14*16], m6
+
+    mova                    m0, [tmpq+15*%%str]
+    mova                    m1, [tmpq+ 3*%%str]
+    mova                    m2, [tmpq+ 7*%%str]
+    mova                    m5, [tmpq+11*%%str]
+    mova                    m7, [tmpq+ 1*%%str]
+    TRANSPOSE8x8W            0, 1, 2, 3, 4, 5, 6, 7, [tmpq+13*%%str], [tmpq+ 9*%%str], 1
+    mova          [tmpq+ 1*16], m0
+    mova          [tmpq+ 3*16], m1
+    mova          [tmpq+ 5*16], m2
+    mova          [tmpq+ 7*16], m3
+    mova          [tmpq+11*16], m5
+    mova          [tmpq+13*16], m6
+    mova          [tmpq+15*16], m7
+%endif
+%else
+    pxor                    m4, m4
+
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_512]
+%else
+%define ROUND_REG [pw_32]
+%endif
+
+%if ARCH_X86_64
+    mova                   m12, [tmpq+ 6*%%str]
+    VP9_IDCT8_WRITEx2        1, 11, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2       14,  0, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2        3, 15, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2       12,  6, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+
+    mova                    m1, [tmpq+ 3*%%str]
+    mova                   m11, [tmpq+ 7*%%str]
+    mova                   m14, [tmpq+11*%%str]
+    mova                    m0, [tmpq+13*%%str]
+
+    VP9_IDCT8_WRITEx2        7,  1, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2       11,  2, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2        9, 14, 10,  8,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    VP9_IDCT8_WRITEx2        0,  5, 10,  8,  4, ROUND_REG, 6
+%else
+    mova       [tmpq+ 0*%%str], m2
+    mova       [tmpq+ 1*%%str], m5
+    mova       [tmpq+ 2*%%str], m7
+    mova                    m2, [tmpq+ 9*%%str]
+    VP9_IDCT8_WRITEx2        1,  2,  5,  7,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m5, [tmpq+ 5*%%str]
+    VP9_IDCT8_WRITEx2        5,  0,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m5, [tmpq+ 8*%%str]
+    VP9_IDCT8_WRITEx2        3,  5,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m5, [tmpq+ 6*%%str]
+    VP9_IDCT8_WRITEx2        5,  6,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+
+    mova                    m0, [tmpq+ 2*%%str]
+    mova                    m3, [tmpq+ 3*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m0, [tmpq+ 7*%%str]
+    mova                    m3, [tmpq+ 0*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m0, [tmpq+14*%%str]
+    mova                    m3, [tmpq+11*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+    lea                   dstq, [dstq+strideq*2]
+    mova                    m0, [tmpq+13*%%str]
+    mova                    m3, [tmpq+ 1*%%str]
+    VP9_IDCT8_WRITEx2        0,  3,  1,  2,  4, ROUND_REG, 6
+%endif
+
+    SWAP                     0,  4 ; zero
+%undef ROUND_REG
+%endif
+%endmacro
+
+%macro IADST16_FN 5
+INIT_XMM %5
+cglobal vp9_%1_%3_16x16_add, 3, 6, 16, 512, dst, stride, block, cnt, dst_bak, tmp
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_%2_1D       blockq, 1
+    add             blockq, 16
+    add               tmpq, 256
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 32
+
+    mov               cntd, 2
+    mov               tmpq, rsp
+    mov           dst_bakq, dstq
+.loop2_full:
+    VP9_%4_1D         tmpq, 2
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+IADST16_FN idct,  IDCT16,  iadst, IADST16, sse2
+IADST16_FN iadst, IADST16, idct,  IDCT16,  sse2
+IADST16_FN iadst, IADST16, iadst, IADST16, sse2
+IADST16_FN idct,  IDCT16,  iadst, IADST16, ssse3
+IADST16_FN iadst, IADST16, idct,  IDCT16,  ssse3
+IADST16_FN iadst, IADST16, iadst, IADST16, ssse3
+IADST16_FN idct,  IDCT16,  iadst, IADST16, avx
+IADST16_FN iadst, IADST16, idct,  IDCT16,  avx
+IADST16_FN iadst, IADST16, iadst, IADST16, avx
+
+;---------------------------------------------------------------------------------------------
+; void vp9_idct_idct_32x32_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT32_1D 2-3 32 ; src, pass, nnzc
+%assign %%str 16*%2*%2
+    ; first do t0-15, this can be done identical to idct16x16
+    VP9_IDCT16_1D_START %1, %3/2, 64*2, tmpq, 2*%%str, 1
+
+    ; store everything on stack to make space available for t16-31
+    ; we store interleaved with the output of the second half (t16-31)
+    ; so we don't need to allocate extra stack space
+    mova    [tmpq+ 0*%%str], m0     ; t0
+    mova    [tmpq+ 4*%%str], m1     ; t1
+    mova    [tmpq+ 8*%%str], m2     ; t2
+    mova    [tmpq+12*%%str], m3     ; t3
+    mova    [tmpq+16*%%str], m4     ; t4
+    mova    [tmpq+20*%%str], m5     ; t5
+%if ARCH_X86_64
+    mova    [tmpq+22*%%str], m10    ; t10
+    mova    [tmpq+18*%%str], m11    ; t11
+    mova    [tmpq+14*%%str], m12    ; t12
+    mova    [tmpq+10*%%str], m13    ; t13
+    mova    [tmpq+ 6*%%str], m14    ; t14
+    mova    [tmpq+ 2*%%str], m15    ; t15
+%endif
+
+    mova                m0, [tmpq+ 30*%%str]
+    UNSCRATCH            1,  6, tmpq+26*%%str
+    UNSCRATCH            2,  8, tmpq+24*%%str
+    UNSCRATCH            3,  9, tmpq+28*%%str
+    SUMSUB_BA            w,  1,  3, 4       ; t6, t9
+    SUMSUB_BA            w,  0,  2, 4       ; t7, t8
+
+    mova    [tmpq+24*%%str], m1     ; t6
+    mova    [tmpq+28*%%str], m0     ; t7
+    mova    [tmpq+30*%%str], m2     ; t8
+    mova    [tmpq+26*%%str], m3     ; t9
+
+    ; then, secondly, do t16-31
+%if %3 <= 8
+    mova                 m4, [%1+ 1*64]
+    mova                 m7, [%1+ 7*64]
+
+    pmulhrsw             m1,  m4, [pw_16364x2] ;t31
+    pmulhrsw             m4, [pw_804x2] ;t16
+
+    VP9_UNPACK_MULSUB_2W_4X   5,  0,  1,  4, 16069,  3196, [pd_8192], 6,  2 ; t17, t30
+
+    pmulhrsw             m3,  m7, [pw_m5520x2] ;t19
+    pmulhrsw             m7, [pw_15426x2] ;t28
+
+    SCRATCH               4, 13, tmpq+ 1*%%str
+    SCRATCH               5, 12, tmpq+15*%%str
+
+    VP9_UNPACK_MULSUB_2W_4X   2,  6,  7,  3, 3196, m16069, [pd_8192], 4,  5 ; t18, t29
+%else
+    mova                 m0, [%1+ 1*64]
+    mova                 m1, [%1+15*64]
+%if %3 <= 16
+    pmulhrsw             m5, m0, [pw_16364x2]
+    pmulhrsw             m0, [pw_804x2]
+    pmulhrsw             m4, m1, [pw_m11003x2]
+    pmulhrsw             m1, [pw_12140x2]
+%else
+    mova                 m4, [%1+17*64]
+    mova                 m5, [%1+31*64]
+
+    VP9_UNPACK_MULSUB_2W_4X   0,  5, 16364,   804, [pd_8192], 2, 3 ; t16, t31
+    VP9_UNPACK_MULSUB_2W_4X   4,  1, 11003, 12140, [pd_8192], 2, 3 ; t17, t30
+%endif
+    SUMSUB_BA             w,  4,  0,  2
+    SUMSUB_BA             w,  1,  5,  2
+
+    VP9_UNPACK_MULSUB_2W_4X   5,  0, 16069,  3196, [pd_8192], 2, 3 ; t17, t30
+
+    SCRATCH               4, 13, tmpq+ 1*%%str
+    SCRATCH               5, 12, tmpq+15*%%str
+
+    mova                 m2, [%1+ 7*64]
+    mova                 m3, [%1+ 9*64]
+%if %3 <= 16
+    pmulhrsw             m7,  m3, [pw_14811x2]
+    pmulhrsw             m3, [pw_7005x2]
+    pmulhrsw             m6,  m2, [pw_m5520x2]
+    pmulhrsw             m2, [pw_15426x2]
+%else
+    mova                 m7, [%1+23*64]
+    mova                 m6, [%1+25*64]
+
+    VP9_UNPACK_MULSUB_2W_4X   3,  7, 14811,  7005, [pd_8192], 4, 5 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X   6,  2,  5520, 15426, [pd_8192], 4, 5 ; t19, t28
+%endif
+    SUMSUB_BA             w,  3,  6,  4
+    SUMSUB_BA             w,  7,  2,  4
+
+    VP9_UNPACK_MULSUB_2W_4X   2,  6, 3196, m16069, [pd_8192], 4, 5 ; t18, t29
+%endif
+
+    UNSCRATCH             5, 12, tmpq+15*%%str
+    SUMSUB_BA             w,  6,  0,  4
+    mova    [tmpq+25*%%str], m6             ; t19
+    UNSCRATCH             4, 13, tmpq+ 1*%%str
+    SUMSUB_BA             w,  7,  1,  6
+    SUMSUB_BA             w,  3,  4,  6
+    mova    [tmpq+23*%%str], m3             ; t16
+    SUMSUB_BA             w,  2,  5,  6
+
+    VP9_UNPACK_MULSUB_2W_4X   0,  5, 15137,  6270, [pd_8192], 6, 3 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X   1,  4, 15137,  6270, [pd_8192], 6, 3 ; t19, t28
+
+    SCRATCH               0, 10, tmpq+ 1*%%str
+    SCRATCH               1, 11, tmpq+ 7*%%str
+    SCRATCH               2,  9, tmpq+ 9*%%str
+    SCRATCH               4, 14, tmpq+15*%%str
+    SCRATCH               5, 15, tmpq+17*%%str
+    SCRATCH               7, 13, tmpq+31*%%str
+
+%if %3 <= 8
+    mova                 m0, [%1+ 5*64]
+    mova                 m3, [%1+ 3*64]
+
+    pmulhrsw             m5,  m0, [pw_15893x2] ;t27
+    pmulhrsw             m0, [pw_3981x2] ;t20
+
+    VP9_UNPACK_MULSUB_2W_4X   1,  4,  5,  0,  9102, 13623, [pd_8192], 7,  2 ; t21, t26
+
+    pmulhrsw             m6,  m3, [pw_m2404x2] ;t23
+    pmulhrsw             m3, [pw_16207x2] ;t24
+
+    SCRATCH               5,  8, tmpq+ 5*%%str
+    SCRATCH               4, 12, tmpq+11*%%str
+
+    VP9_UNPACK_MULSUB_2W_4X   7,  2,  3,  6, 13623, m9102, [pd_8192], 4, 5 ; t22, t25
+%else
+    mova                 m4, [%1+ 5*64]
+    mova                 m5, [%1+11*64]
+%if %3 <= 16
+    pmulhrsw             m1, m4, [pw_15893x2]
+    pmulhrsw             m4, [pw_3981x2]
+    pmulhrsw             m0, m5, [pw_m8423x2]
+    pmulhrsw             m5, [pw_14053x2]
+%else
+    mova                 m0, [%1+21*64]
+    mova                 m1, [%1+27*64]
+
+    VP9_UNPACK_MULSUB_2W_4X   4,  1, 15893,  3981, [pd_8192], 2, 3 ; t20, t27
+    VP9_UNPACK_MULSUB_2W_4X   0,  5,  8423, 14053, [pd_8192], 2, 3 ; t21, t26
+%endif
+    SUMSUB_BA             w,  0,  4,  2
+    SUMSUB_BA             w,  5,  1,  2
+
+    VP9_UNPACK_MULSUB_2W_4X   1,  4,  9102, 13623, [pd_8192], 2, 3 ; t21, t26
+
+    SCRATCH               5,  8, tmpq+ 5*%%str
+    SCRATCH               4, 12, tmpq+11*%%str
+
+    mova                 m7, [%1+ 3*64]
+    mova                 m6, [%1+13*64]
+%if %3 <= 16
+    pmulhrsw             m3, m6, [pw_13160x2]
+    pmulhrsw             m6, [pw_9760x2]
+    pmulhrsw             m2, m7, [pw_m2404x2]
+    pmulhrsw             m7, [pw_16207x2]
+%else
+    mova                 m2, [%1+29*64]
+    mova                 m3, [%1+19*64]
+    VP9_UNPACK_MULSUB_2W_4X   6,  3, 13160,  9760, [pd_8192], 4, 5 ; t22, t25
+    VP9_UNPACK_MULSUB_2W_4X   2,  7,  2404, 16207, [pd_8192], 4, 5 ; t23, t24
+%endif
+    SUMSUB_BA             w,  6,  2,  4
+    SUMSUB_BA             w,  3,  7,  4
+
+    VP9_UNPACK_MULSUB_2W_4X   7,  2, 13623, m9102, [pd_8192], 4, 5 ; t22, t25
+%endif
+
+    ; m4=t16, m5=t17, m9=t18, m8=t19, m0=t20, m1=t21, m13=t22, m12=t23,
+    ; m3=t24, m2=t25, m14=t26, m15=t27, m7=t28, m6=t29, m10=t30, m11=t31
+
+    UNSCRATCH             4, 12, tmpq+11*%%str
+    SUMSUB_BA             w,  0,  6, 5
+    SUMSUB_BA             w,  4,  2, 5
+    UNSCRATCH             5,  8, tmpq+ 5*%%str
+    SCRATCH               4,  8, tmpq+11*%%str
+    SUMSUB_BA             w,  1,  7, 4
+    SUMSUB_BA             w,  5,  3, 4
+    SCRATCH               5, 12, tmpq+ 5*%%str
+
+    VP9_UNPACK_MULSUB_2W_4X   3,  6, 6270, m15137, [pd_8192], 4, 5 ; t20, t27
+    VP9_UNPACK_MULSUB_2W_4X   2,  7, 6270, m15137, [pd_8192], 4, 5 ; t21, t26
+
+    ; m8[s]=t16, m9=t17, m5=t18, m4[s]=t19, m12=t20, m13=t21, m1=t22, m0=t23,
+    ; m15=t24, m14=t25, m2=t26, m3=t27, m11=t28, m10=t29, m6=t30, m7=t31
+
+    UNSCRATCH             5,  9, tmpq+ 9*%%str
+    mova                 m4, [tmpq+23*%%str] ; t16
+%if ARCH_X86_64
+    SUMSUB_BA             w,  1,  5,  9
+    SUMSUB_BA             w,  0,  4,  9
+%else
+    SUMSUB_BADC           w,  1,  5,  0,  4
+%endif
+    mova    [tmpq+29*%%str], m1     ; t17
+    mova    [tmpq+21*%%str], m0     ; t16
+    UNSCRATCH             0, 10, tmpq+ 1*%%str
+    UNSCRATCH             1, 11, tmpq+ 7*%%str
+%if ARCH_X86_64
+    SUMSUB_BA             w,  2,  0,  9
+    SUMSUB_BA             w,  3,  1,  9
+%else
+    SUMSUB_BADC           w,  2,  0,  3,  1
+%endif
+    mova    [tmpq+ 9*%%str], m2     ; t18
+    mova    [tmpq+13*%%str], m3     ; t19
+    SCRATCH               0, 10, tmpq+23*%%str
+    SCRATCH               1, 11, tmpq+27*%%str
+
+    UNSCRATCH             2, 14, tmpq+15*%%str
+    UNSCRATCH             3, 15, tmpq+17*%%str
+    SUMSUB_BA             w,  6,  2, 0
+    SUMSUB_BA             w,  7,  3, 0
+    SCRATCH               6, 14, tmpq+ 3*%%str
+    SCRATCH               7, 15, tmpq+ 7*%%str
+
+    UNSCRATCH             0,  8, tmpq+11*%%str
+    mova                 m1, [tmpq+25*%%str] ; t19
+    UNSCRATCH             6, 12, tmpq+ 5*%%str
+    UNSCRATCH             7, 13, tmpq+31*%%str
+%if ARCH_X86_64
+    SUMSUB_BA             w,  0,  1,  9
+    SUMSUB_BA             w,  6,  7,  9
+%else
+    SUMSUB_BADC           w,  0,  1,  6,  7
+%endif
+
+    ; m0=t16, m1=t17, m2=t18, m3=t19, m11=t20, m10=t21, m9=t22, m8=t23,
+    ; m7=t24, m6=t25, m5=t26, m4=t27, m12=t28, m13=t29, m14=t30, m15=t31
+
+%if 0; cpuflag(ssse3)
+%if ARCH_X86_64
+    SUMSUB_BA             w,  4,  7,  8
+    SUMSUB_BA             w,  5,  1,  8
+%else
+    SUMSUB_BADC           w,  4,  7,  5,  1
+%endif
+
+    pmulhrsw             m7, [pw_11585x2]
+    pmulhrsw             m4, [pw_11585x2]
+    pmulhrsw             m1, [pw_11585x2]
+    pmulhrsw             m5, [pw_11585x2]
+
+    mova    [tmpq+ 5*%%str], m7     ; t23
+    SCRATCH               1, 13, tmpq+25*%%str
+    UNSCRATCH             7, 10, tmpq+23*%%str
+    UNSCRATCH             1, 11, tmpq+27*%%str
+
+%if ARCH_X86_64
+    SUMSUB_BA             w,  7,  3, 10
+    SUMSUB_BA             w,  1,  2, 10
+%else
+    SUMSUB_BADC           w,  7,  3,  1,  2
+%endif
+
+    pmulhrsw             m3, [pw_11585x2]
+    pmulhrsw             m7, [pw_11585x2]
+    pmulhrsw             m2, [pw_11585x2]
+    pmulhrsw             m1, [pw_11585x2]
+%else
+    SCRATCH               0,  8, tmpq+15*%%str
+    SCRATCH               6,  9, tmpq+17*%%str
+    VP9_UNPACK_MULSUB_2W_4X  7,  4, 11585, 11585, [pd_8192], 0, 6
+    mova    [tmpq+ 5*%%str], m7     ; t23
+    UNSCRATCH             7, 10, tmpq+23*%%str
+    VP9_UNPACK_MULSUB_2W_4X  1,  5, 11585, 11585, [pd_8192], 0, 6
+    SCRATCH               1, 13, tmpq+25*%%str
+    UNSCRATCH             1, 11, tmpq+27*%%str
+    VP9_UNPACK_MULSUB_2W_4X  3,  7, 11585, 11585, [pd_8192], 0, 6
+    VP9_UNPACK_MULSUB_2W_4X  2,  1, 11585, 11585, [pd_8192], 0, 6
+    UNSCRATCH             0,  8, tmpq+15*%%str
+    UNSCRATCH             6,  9, tmpq+17*%%str
+%endif
+
+    ; m0=t16, m1=t17, m2=t18, m3=t19, m4=t20, m5=t21, m6=t22, m7=t23,
+    ; m8=t24, m9=t25, m10=t26, m11=t27, m12=t28, m13=t29, m14=t30, m15=t31
+
+    ; then do final pass to sumsub+store the two halves
+%if %2 == 1
+    mova    [tmpq+17*%%str], m2     ; t20
+    mova    [tmpq+ 1*%%str], m3     ; t21
+%if ARCH_X86_64
+    mova    [tmpq+25*%%str], m13    ; t22
+
+    mova                 m8, [tmpq+ 0*%%str] ; t0
+    mova                 m9, [tmpq+ 4*%%str] ; t1
+    mova                m12, [tmpq+ 8*%%str] ; t2
+    mova                m11, [tmpq+12*%%str] ; t3
+    mova                 m2, [tmpq+16*%%str] ; t4
+    mova                 m3, [tmpq+20*%%str] ; t5
+    mova                m13, [tmpq+24*%%str] ; t6
+
+    SUMSUB_BA             w,  6,  8,  10
+    mova    [tmpq+ 3*%%str], m8              ; t15
+    mova                m10, [tmpq+28*%%str] ; t7
+    SUMSUB_BA             w,  0,  9,  8
+    SUMSUB_BA             w, 15, 12,  8
+    SUMSUB_BA             w, 14, 11,  8
+    SUMSUB_BA             w,  1,  2,  8
+    SUMSUB_BA             w,  7,  3,  8
+    SUMSUB_BA             w,  5, 13,  8
+    SUMSUB_BA             w,  4, 10,  8
+
+    TRANSPOSE8x8W         6, 0, 15, 14, 1, 7, 5, 4, 8
+    mova    [tmpq+ 0*%%str], m6
+    mova    [tmpq+ 4*%%str], m0
+    mova    [tmpq+ 8*%%str], m15
+    mova    [tmpq+12*%%str], m14
+    mova    [tmpq+16*%%str], m1
+    mova    [tmpq+20*%%str], m7
+    mova    [tmpq+24*%%str], m5
+    mova    [tmpq+28*%%str], m4
+
+    mova                  m8, [tmpq+ 3*%%str] ; t15
+    TRANSPOSE8x8W         10, 13, 3, 2, 11, 12, 9, 8, 0
+    mova    [tmpq+ 3*%%str], m10
+    mova    [tmpq+ 7*%%str], m13
+    mova    [tmpq+11*%%str], m3
+    mova    [tmpq+15*%%str], m2
+    mova    [tmpq+19*%%str], m11
+    mova    [tmpq+23*%%str], m12
+    mova    [tmpq+27*%%str], m9
+    mova    [tmpq+31*%%str], m8
+
+    mova                m15, [tmpq+30*%%str] ; t8
+    mova                m14, [tmpq+26*%%str] ; t9
+    mova                m13, [tmpq+22*%%str] ; t10
+    mova                m12, [tmpq+18*%%str] ; t11
+    mova                m11, [tmpq+14*%%str] ; t12
+    mova                m10, [tmpq+10*%%str] ; t13
+    mova                 m9, [tmpq+ 6*%%str] ; t14
+    mova                 m8, [tmpq+ 2*%%str] ; t15
+    mova                 m7, [tmpq+21*%%str] ; t16
+    mova                 m6, [tmpq+29*%%str] ; t17
+    mova                 m5, [tmpq+ 9*%%str] ; t18
+    mova                 m4, [tmpq+13*%%str] ; t19
+    mova                 m3, [tmpq+17*%%str] ; t20
+    mova                 m2, [tmpq+ 1*%%str] ; t21
+    mova                 m1, [tmpq+25*%%str] ; t22
+
+    SUMSUB_BA             w,  7,  8, 0
+    mova    [tmpq+ 2*%%str], m8
+    mova                 m0, [tmpq+ 5*%%str] ; t23
+    SUMSUB_BA             w,  6,  9, 8
+    SUMSUB_BA             w,  5, 10, 8
+    SUMSUB_BA             w,  4, 11, 8
+    SUMSUB_BA             w,  3, 12, 8
+    SUMSUB_BA             w,  2, 13, 8
+    SUMSUB_BA             w,  1, 14, 8
+    SUMSUB_BA             w,  0, 15, 8
+
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, 8
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 5*%%str], m1
+    mova    [tmpq+ 9*%%str], m2
+    mova    [tmpq+13*%%str], m3
+    mova    [tmpq+17*%%str], m4
+    mova    [tmpq+21*%%str], m5
+    mova    [tmpq+25*%%str], m6
+    mova    [tmpq+29*%%str], m7
+
+    mova                 m8, [tmpq+ 2*%%str]
+    TRANSPOSE8x8W         8, 9, 10, 11, 12, 13, 14, 15, 0
+    mova    [tmpq+ 2*%%str], m8
+    mova    [tmpq+ 6*%%str], m9
+    mova    [tmpq+10*%%str], m10
+    mova    [tmpq+14*%%str], m11
+    mova    [tmpq+18*%%str], m12
+    mova    [tmpq+22*%%str], m13
+    mova    [tmpq+26*%%str], m14
+    mova    [tmpq+30*%%str], m15
+%else
+    mova                 m2, [tmpq+24*%%str] ; t6
+    mova                 m3, [tmpq+28*%%str] ; t7
+    SUMSUB_BADC           w,  5,  2,  4,  3
+    mova    [tmpq+24*%%str], m5
+    mova    [tmpq+23*%%str], m2
+    mova    [tmpq+28*%%str], m4
+    mova    [tmpq+19*%%str], m3
+
+    mova                 m2, [tmpq+16*%%str] ; t4
+    mova                 m3, [tmpq+20*%%str] ; t5
+    SUMSUB_BA             w,  1,  2,  5
+    SUMSUB_BA             w,  7,  3,  5
+    mova    [tmpq+15*%%str], m2
+    mova    [tmpq+11*%%str], m3
+
+    mova                 m2, [tmpq+ 0*%%str] ; t0
+    mova                 m3, [tmpq+ 4*%%str] ; t1
+    SUMSUB_BA             w,  6,  2,  5
+    SUMSUB_BA             w,  0,  3,  5
+    mova    [tmpq+31*%%str], m2
+    mova    [tmpq+27*%%str], m3
+
+    mova                 m2, [tmpq+ 8*%%str] ; t2
+    mova                 m3, [tmpq+12*%%str] ; t3
+    mova                 m5, [tmpq+ 7*%%str]
+    mova                 m4, [tmpq+ 3*%%str]
+    SUMSUB_BADC           w,  5,  2,  4,  3
+    mova    [tmpq+ 7*%%str], m2
+    mova    [tmpq+ 3*%%str], m3
+
+    mova                 m3, [tmpq+28*%%str]
+    TRANSPOSE8x8W         6, 0, 5, 4, 1, 7, 2, 3, [tmpq+24*%%str], [tmpq+16*%%str], 1
+    mova    [tmpq+ 0*%%str], m6
+    mova    [tmpq+ 4*%%str], m0
+    mova    [tmpq+ 8*%%str], m5
+    mova    [tmpq+12*%%str], m4
+    mova    [tmpq+20*%%str], m7
+    mova    [tmpq+24*%%str], m2
+    mova    [tmpq+28*%%str], m3
+
+    mova                 m6, [tmpq+19*%%str]
+    mova                 m0, [tmpq+23*%%str]
+    mova                 m5, [tmpq+11*%%str]
+    mova                 m4, [tmpq+15*%%str]
+    mova                 m1, [tmpq+ 3*%%str]
+    mova                 m7, [tmpq+ 7*%%str]
+    mova                 m3, [tmpq+31*%%str]
+    TRANSPOSE8x8W         6, 0, 5, 4, 1, 7, 2, 3, [tmpq+27*%%str], [tmpq+19*%%str], 1
+    mova    [tmpq+ 3*%%str], m6
+    mova    [tmpq+ 7*%%str], m0
+    mova    [tmpq+11*%%str], m5
+    mova    [tmpq+15*%%str], m4
+    mova    [tmpq+23*%%str], m7
+    mova    [tmpq+27*%%str], m2
+    mova    [tmpq+31*%%str], m3
+
+    mova                 m1, [tmpq+ 6*%%str] ; t14
+    mova                 m0, [tmpq+ 2*%%str] ; t15
+    mova                 m7, [tmpq+21*%%str] ; t16
+    mova                 m6, [tmpq+29*%%str] ; t17
+    SUMSUB_BA             w,  7,  0,  2
+    SUMSUB_BA             w,  6,  1,  2
+    mova    [tmpq+29*%%str], m7
+    mova    [tmpq+ 2*%%str], m0
+    mova    [tmpq+21*%%str], m6
+    mova    [tmpq+ 6*%%str], m1
+
+    mova                 m1, [tmpq+14*%%str] ; t12
+    mova                 m0, [tmpq+10*%%str] ; t13
+    mova                 m5, [tmpq+ 9*%%str] ; t18
+    mova                 m4, [tmpq+13*%%str] ; t19
+    SUMSUB_BA             w,  5,  0,  2
+    SUMSUB_BA             w,  4,  1,  2
+    mova     [tmpq+10*%%str], m0
+    mova     [tmpq+14*%%str], m1
+
+    mova                 m1, [tmpq+22*%%str] ; t10
+    mova                 m0, [tmpq+18*%%str] ; t11
+    mova                 m3, [tmpq+17*%%str] ; t20
+    mova                 m2, [tmpq+ 1*%%str] ; t21
+    SUMSUB_BA             w,  3,  0,  6
+    SUMSUB_BA             w,  2,  1,  6
+    mova     [tmpq+18*%%str], m0
+    mova     [tmpq+22*%%str], m1
+
+    mova                 m7, [tmpq+30*%%str] ; t8
+    mova                 m6, [tmpq+26*%%str] ; t9
+    mova                 m1, [tmpq+25*%%str] ; t22
+    mova                 m0, [tmpq+ 5*%%str] ; t23
+    SUMSUB_BADC           w,  1,  6,  0,  7
+    mova     [tmpq+26*%%str], m6
+    mova     [tmpq+30*%%str], m7
+
+    mova                 m7, [tmpq+29*%%str]
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, [tmpq+21*%%str], [tmpq+17*%%str], 1
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 5*%%str], m1
+    mova    [tmpq+ 9*%%str], m2
+    mova    [tmpq+13*%%str], m3
+    mova    [tmpq+21*%%str], m5
+    mova    [tmpq+25*%%str], m6
+    mova    [tmpq+29*%%str], m7
+
+    mova                 m0, [tmpq+ 2*%%str]
+    mova                 m1, [tmpq+ 6*%%str]
+    mova                 m2, [tmpq+10*%%str]
+    mova                 m3, [tmpq+14*%%str]
+    mova                 m4, [tmpq+18*%%str]
+    mova                 m5, [tmpq+22*%%str]
+    mova                 m7, [tmpq+30*%%str]
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, [tmpq+26*%%str], [tmpq+18*%%str], 1
+    mova    [tmpq+ 2*%%str], m0
+    mova    [tmpq+ 6*%%str], m1
+    mova    [tmpq+10*%%str], m2
+    mova    [tmpq+14*%%str], m3
+    mova    [tmpq+22*%%str], m5
+    mova    [tmpq+26*%%str], m6
+    mova    [tmpq+30*%%str], m7
+%endif
+%else
+    ; t0-7 is in [tmpq+{0,4,8,12,16,20,24,28}*%%str]
+    ; t8-15 is in [tmpq+{2,6,10,14,18,22,26,30}*%%str]
+    ; t16-19 and t23 is in [tmpq+{1,5,9,13,29}*%%str]
+    ; t20-22 is in m4-6
+    ; t24-31 is in m8-15
+
+%if cpuflag(ssse3)
+%define ROUND_REG [pw_512]
+%else
+%define ROUND_REG [pw_32]
+%endif
+
+%macro %%STORE_2X2 7-8 1 ; src[1-4], tmp[1-2], zero, inc_dst_ptrs
+    SUMSUB_BA            w, %4, %1, %5
+    SUMSUB_BA            w, %3, %2, %5
+    VP9_IDCT8_WRITEx2   %4, %3, %5, %6, %7, ROUND_REG, 6
+%if %8 == 1
+    add               dstq, stride2q
+%endif
+    VP9_IDCT8_WRITEx2   %2, %1, %5, %6, %7, ROUND_REG, 6, dst_endq
+%if %8 == 1
+    sub           dst_endq, stride2q
+%endif
+%endmacro
+
+%if ARCH_X86_64
+    pxor               m10, m10
+
+    ; store t0-1 and t30-31
+    mova                m8, [tmpq+ 0*%%str]
+    mova                m9, [tmpq+ 4*%%str]
+    %%STORE_2X2          8,  9,  0,  6, 12, 11, 10
+
+    ; store t2-3 and t28-29
+    mova                m8, [tmpq+ 8*%%str]
+    mova                m9, [tmpq+12*%%str]
+    %%STORE_2X2          8,  9, 14, 15, 12, 11, 10
+
+    ; store t4-5 and t26-27
+    mova                m8, [tmpq+16*%%str]
+    mova                m9, [tmpq+20*%%str]
+    %%STORE_2X2          8,  9,  7,  1, 12, 11, 10
+
+    ; store t6-7 and t24-25
+    mova                m8, [tmpq+24*%%str]
+    mova                m9, [tmpq+28*%%str]
+    %%STORE_2X2          8,  9,  4,  5, 12, 11, 10
+
+    ; store t8-9 and t22-23
+    mova                m8, [tmpq+30*%%str]
+    mova                m9, [tmpq+26*%%str]
+    mova                m0, [tmpq+ 5*%%str]
+    %%STORE_2X2          8,  9, 13,  0, 12, 11, 10
+
+    ; store t10-11 and t20-21
+    mova                m8, [tmpq+22*%%str]
+    mova                m9, [tmpq+18*%%str]
+    %%STORE_2X2          8,  9,  2,  3, 12, 11, 10
+
+    ; store t12-13 and t18-19
+    mova                m8, [tmpq+14*%%str]
+    mova                m9, [tmpq+10*%%str]
+    mova                m5, [tmpq+13*%%str]
+    mova                m4, [tmpq+ 9*%%str]
+    %%STORE_2X2          8,  9,  4,  5, 12, 11, 10
+
+    ; store t14-17
+    mova                m8, [tmpq+ 6*%%str]
+    mova                m9, [tmpq+ 2*%%str]
+    mova                m5, [tmpq+29*%%str]
+    mova                m4, [tmpq+21*%%str]
+    %%STORE_2X2          8,  9,  4,  5, 12, 11, 10, 0
+
+    SWAP                 1, 10 ; zero
+%else
+    mova   [tmpq+ 1*%%str], m1
+    mova   [tmpq+11*%%str], m2
+    mova   [tmpq+15*%%str], m3
+    mova   [tmpq+17*%%str], m4
+    mova   [tmpq+19*%%str], m5
+    pxor                m1, m1
+
+    ; store t0-1 and t30-31
+    mova                m2, [tmpq+ 0*%%str]
+    mova                m3, [tmpq+ 4*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t2-3 and t28-29
+    mova                m2, [tmpq+ 8*%%str]
+    mova                m3, [tmpq+12*%%str]
+    mova                m0, [tmpq+ 3*%%str]
+    mova                m6, [tmpq+ 7*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t4-5 and t26-27
+    mova                m2, [tmpq+16*%%str]
+    mova                m3, [tmpq+20*%%str]
+    mova                m0, [tmpq+ 1*%%str]
+    %%STORE_2X2          2,  3,  7,  0, 4, 5, 1
+
+    ; store t6-7 and t24-25
+    mova                m2, [tmpq+24*%%str]
+    mova                m3, [tmpq+28*%%str]
+    mova                m0, [tmpq+17*%%str]
+    mova                m6, [tmpq+19*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t8-9 and t22-23
+    mova                m2, [tmpq+30*%%str]
+    mova                m3, [tmpq+26*%%str]
+    mova                m0, [tmpq+25*%%str]
+    mova                m6, [tmpq+ 5*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t10-11 and t20-21
+    mova                m2, [tmpq+22*%%str]
+    mova                m3, [tmpq+18*%%str]
+    mova                m0, [tmpq+11*%%str]
+    mova                m6, [tmpq+15*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t12-13 and t18-19
+    mova                m2, [tmpq+14*%%str]
+    mova                m3, [tmpq+10*%%str]
+    mova                m6, [tmpq+13*%%str]
+    mova                m0, [tmpq+ 9*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1
+
+    ; store t14-17
+    mova                m2, [tmpq+ 6*%%str]
+    mova                m3, [tmpq+ 2*%%str]
+    mova                m6, [tmpq+29*%%str]
+    mova                m0, [tmpq+21*%%str]
+    %%STORE_2X2          2,  3,  0,  6, 4, 5, 1, 0
+%endif
+%undef ROUND_REG
+%endif
+%endmacro
+
+%macro VP9_IDCT_IDCT_32x32_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_32x32_add, 0, 6 + ARCH_X86_64 * 3, 16, 2048, dst, stride, block, eob
+    movifnidn         eobd, dword eobm
+%if cpuflag(ssse3)
+    cmp eobd, 135
+    jg .idctfull
+    cmp eobd, 34
+    jg .idct16x16
+    cmp eobd, 1
+    jg .idct8x8
+%else
+    cmp eobd, 1
+    jg .idctfull
+%endif
+
+    ; dc-only case
+    movifnidn       blockq, blockmp
+    movifnidn         dstq, dstmp
+    movifnidn      strideq, stridemp
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    mova                m1, [pw_11585x2]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    movsx            coefd, word [blockq]
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, (32 << 14) + 8192
+    sar              coefd, 14 + 6
+    movd                m0, coefd
+%endif
+    SPLATW              m0, m0, q0000
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_512]
+%endif
+    pxor                m5, m5
+    movd          [blockq], m5
+%rep 31
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5, mmsize
+    add               dstq, strideq
+%endrep
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5, mmsize
+    RET
+
+%if ARCH_X86_64
+    DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp
+%else
+%define dst_bakq r0mp
+%endif
+%if cpuflag(ssse3)
+.idct8x8:
+%if ARCH_X86_32
+    DEFINE_ARGS block, u1, u2, u3, u4, tmp
+    mov             blockq, r2mp
+%endif
+    mov               tmpq, rsp
+    VP9_IDCT32_1D   blockq, 1, 8
+
+%if ARCH_X86_32
+    DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+    mov            strideq, r1mp
+%define cntd dword r3m
+%endif
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    sub          stride30q, stride2q        ; stride*30
+.loop2_8x8:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 8
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_8x8
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+%if ARCH_X86_32
+    DEFINE_ARGS block
+    mov             blockq, r2mp
+%endif
+    ZERO_BLOCK      blockq, 64,  8, m1
+    RET
+
+.idct16x16:
+%if ARCH_X86_32
+    DEFINE_ARGS block, tmp, cnt
+    mov             blockq, r2mp
+%endif
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_16x16:
+    VP9_IDCT32_1D   blockq, 1, 16
+    add             blockq, 16
+    add               tmpq, 512
+    dec               cntd
+    jg .loop1_16x16
+
+%if ARCH_X86_64
+    sub             blockq, 32
+%else
+    DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+    mov            strideq, r1mp
+%define cntd dword r3m
+%endif
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_16x16:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 16
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_16x16
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+%if ARCH_X86_32
+    DEFINE_ARGS block
+    mov             blockq, r2mp
+%endif
+    ZERO_BLOCK      blockq, 64, 16, m1
+    RET
+%endif
+
+.idctfull:
+%if ARCH_X86_32
+    DEFINE_ARGS block, tmp, cnt
+    mov             blockq, r2mp
+%endif
+    mov               cntd, 4
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT32_1D   blockq, 1
+    add             blockq, 16
+    add               tmpq, 512
+    dec               cntd
+    jg .loop1_full
+
+%if ARCH_X86_64
+    sub             blockq, 64
+%else
+    DEFINE_ARGS dst, stride, stride30, dst_end, stride2, tmp
+    mov            strideq, r1mp
+%define cntd dword r3m
+%endif
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_full:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dstq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+%if ARCH_X86_32
+    DEFINE_ARGS block
+    mov             blockq, r2mp
+%endif
+    ZERO_BLOCK      blockq, 64, 32, m1
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_32x32_ADD_XMM sse2
+VP9_IDCT_IDCT_32x32_ADD_XMM ssse3
+VP9_IDCT_IDCT_32x32_ADD_XMM avx
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
new file mode 100644
index 000000000..902685edf
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
@@ -0,0 +1,2044 @@
+;******************************************************************************
+;* VP9 inverse transform x86 SIMD optimizations
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+%include "vp9itxfm_template.asm"
+
+SECTION_RODATA
+
+cextern pw_8
+cextern pw_1023
+cextern pw_2048
+cextern pw_4095
+cextern pw_m1
+cextern pd_1
+cextern pd_16
+cextern pd_32
+cextern pd_8192
+
+pd_8: times 4 dd 8
+pd_3fff: times 4 dd 0x3fff
+
+cextern pw_11585x2
+
+cextern pw_5283_13377
+cextern pw_9929_13377
+cextern pw_15212_m13377
+cextern pw_15212_9929
+cextern pw_m5283_m15212
+cextern pw_13377x2
+cextern pw_m13377_13377
+cextern pw_13377_0
+
+pw_9929_m5283: times 4 dw 9929, -5283
+
+%macro COEF_PAIR 2-3
+cextern pw_m%1_%2
+cextern pw_%2_%1
+%if %0 == 3
+cextern pw_m%1_m%2
+%if %1 != %2
+cextern pw_m%2_%1
+cextern pw_%1_%2
+%endif
+%endif
+%endmacro
+
+COEF_PAIR  2404, 16207
+COEF_PAIR  3196, 16069, 1
+COEF_PAIR  4756, 15679
+COEF_PAIR  5520, 15426
+COEF_PAIR  6270, 15137, 1
+COEF_PAIR  8423, 14053
+COEF_PAIR 10394, 12665
+COEF_PAIR 11003, 12140
+COEF_PAIR 11585, 11585, 1
+COEF_PAIR 13160,  9760
+COEF_PAIR 13623,  9102, 1
+COEF_PAIR 14449,  7723
+COEF_PAIR 14811,  7005
+COEF_PAIR 15893,  3981
+COEF_PAIR 16305,  1606
+COEF_PAIR 16364,   804
+
+default_8x8:
+times 12 db 1
+times 52 db 2
+row_8x8:
+times 18 db 1
+times 46 db 2
+col_8x8:
+times 6 db 1
+times 58 db 2
+default_16x16:
+times 10 db 1
+times 28 db 2
+times 51 db 3
+times 167 db 4
+row_16x16:
+times 21 db 1
+times 45 db 2
+times 60 db 3
+times 130 db 4
+col_16x16:
+times 5 db 1
+times 12 db 2
+times 25 db 3
+times 214 db 4
+default_32x32:
+times 9 db 1
+times 25 db 2
+times 36 db 3
+times 65 db 4
+times 105 db 5
+times 96 db 6
+times 112 db 7
+times 576 db 8
+
+SECTION .text
+
+%macro VP9_STORE_2X 6-7 dstq ; reg1, reg2, tmp1, tmp2, min, max, dst
+    mova               m%3, [%7]
+    mova               m%4, [%7+strideq]
+    paddw              m%3, m%1
+    paddw              m%4, m%2
+    pmaxsw             m%3, m%5
+    pmaxsw             m%4, m%5
+    pminsw             m%3, m%6
+    pminsw             m%4, m%6
+    mova              [%7], m%3
+    mova      [%7+strideq], m%4
+%endmacro
+
+%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
+%assign %%y 0
+%rep %3
+%assign %%x 0
+%rep %3*4/mmsize
+    mova      [%1+%%y+%%x], %4
+%assign %%x (%%x+mmsize)
+%endrep
+%assign %%y (%%y+%2)
+%endrep
+%endmacro
+
+; the input coefficients are scaled up by 2 bit (which we downscale immediately
+; in the iwht), and is otherwise orthonormally increased by 1 bit per iwht_1d.
+; therefore, a diff of 10-12+sign bit will fit in 12-14+sign bit after scaling,
+; i.e. everything can be done in 15+1bpp words. Since the quant fractional bits
+; add 2 bits, we need to scale before converting to word in 12bpp, since the
+; input will be 16+sign bit which doesn't fit in 15+sign words, but in 10bpp
+; we can scale after converting to words (which is half the instructions),
+; since the input is only 14+sign bit, which fits in 15+sign words directly.
+
+%macro IWHT4_FN 2 ; bpp, max
+cglobal vp9_iwht_iwht_4x4_add_%1, 3, 3, 8, dst, stride, block, eob
+    mova                m7, [pw_%2]
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+%if %1 >= 12
+    mova                m4, [blockq+0*16+8]
+    mova                m5, [blockq+1*16+8]
+    psrad               m0, 2
+    psrad               m1, 2
+    psrad               m4, 2
+    psrad               m5, 2
+    packssdw            m0, m4
+    packssdw            m1, m5
+%else
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    psraw               m0, 2
+    psraw               m1, 2
+%endif
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+%if %1 >= 12
+    mova                m4, [blockq+2*16+8]
+    mova                m5, [blockq+3*16+8]
+    psrad               m2, 2
+    psrad               m3, 2
+    psrad               m4, 2
+    psrad               m5, 2
+    packssdw            m2, m4
+    packssdw            m3, m5
+%else
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+    psraw               m2, 2
+    psraw               m3, 2
+%endif
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m6, m6
+    VP9_STORE_2X         0, 1, 4, 5, 6, 7
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 4, 5, 6, 7
+    ZERO_BLOCK      blockq, 16, 4, m6
+    RET
+%endmacro
+
+INIT_MMX mmxext
+IWHT4_FN 10, 1023
+INIT_MMX mmxext
+IWHT4_FN 12, 4095
+
+%macro VP9_IDCT4_WRITEOUT 0
+%if cpuflag(ssse3)
+    mova                m5, [pw_2048]
+    pmulhrsw            m0, m5
+    pmulhrsw            m1, m5
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+%else
+    mova                m5, [pw_8]
+    paddw               m0, m5
+    paddw               m1, m5
+    paddw               m2, m5
+    paddw               m3, m5
+    psraw               m0, 4
+    psraw               m1, 4
+    psraw               m2, 4
+    psraw               m3, 4
+%endif
+    mova                m5, [pw_1023]
+    VP9_STORE_2X         0,  1,  6,  7,  4,  5
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         2,  3,  6,  7,  4,  5
+%endmacro
+
+%macro DC_ONLY 2 ; shift, zero
+    mov              coefd, dword [blockq]
+    movd          [blockq], %2
+    imul             coefd, 11585
+    add              coefd, 8192
+    sar              coefd, 14
+    imul             coefd, 11585
+    add              coefd, ((1 << (%1 - 1)) << 14) + 8192
+    sar              coefd, 14 + %1
+%endmacro
+
+; 4x4 coefficients are 5+depth+sign bits, so for 10bpp, everything still fits
+; in 15+1 words without additional effort, since the coefficients are 15bpp.
+
+%macro IDCT4_10_FN 0
+cglobal vp9_idct_idct_4x4_add_10, 4, 4, 8, dst, stride, block, eob
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only
+    pxor                m4, m4
+%if cpuflag(ssse3)
+    movd                m0, [blockq]
+    movd          [blockq], m4
+    mova                m5, [pw_11585x2]
+    pmulhrsw            m0, m5
+    pmulhrsw            m0, m5
+%else
+    DEFINE_ARGS dst, stride, block, coef
+    DC_ONLY              4, m4
+    movd                m0, coefd
+%endif
+    pshufw              m0, m0, 0
+    mova                m5, [pw_1023]
+%if cpuflag(ssse3)
+    pmulhrsw            m0, [pw_2048]       ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+%endif
+    VP9_STORE_2X         0,  0,  6,  7,  4,  5
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4,  5
+    RET
+
+.idctfull:
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT4_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_1D
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+INIT_MMX mmxext
+IDCT4_10_FN
+INIT_MMX ssse3
+IDCT4_10_FN
+
+%macro IADST4_FN 4
+cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob
+%if WIN64 && notcpuflag(ssse3)
+    WIN64_SPILL_XMM 8
+%endif
+    movdqa            xmm5, [pd_8192]
+    mova                m0, [blockq+0*16+0]
+    mova                m1, [blockq+1*16+0]
+    packssdw            m0, [blockq+0*16+8]
+    packssdw            m1, [blockq+1*16+8]
+    mova                m2, [blockq+2*16+0]
+    mova                m3, [blockq+3*16+0]
+    packssdw            m2, [blockq+2*16+8]
+    packssdw            m3, [blockq+3*16+8]
+
+%if cpuflag(ssse3)
+    mova                m6, [pw_11585x2]
+%endif
+%ifnidn %1%3, iadstiadst
+    movdq2q             m7, xmm5
+%endif
+    VP9_%2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_%4_1D
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+INIT_MMX sse2
+IADST4_FN idct,  IDCT4,  iadst, IADST4
+IADST4_FN iadst, IADST4, idct,  IDCT4
+IADST4_FN iadst, IADST4, iadst, IADST4
+
+INIT_MMX ssse3
+IADST4_FN idct,  IDCT4,  iadst, IADST4
+IADST4_FN iadst, IADST4, idct,  IDCT4
+IADST4_FN iadst, IADST4, iadst, IADST4
+
+; inputs and outputs are dwords, coefficients are words
+;
+; dst1 = src1 * coef1 + src2 * coef2 + rnd >> 14
+; dst2 = src1 * coef2 - src2 * coef1 + rnd >> 14
+%macro SUMSUB_MUL 6-8 [pd_8192], [pd_3fff] ; src/dst 1-2, tmp1-2, coef1-2, rnd, mask
+    pand               m%3, m%1, %8
+    pand               m%4, m%2, %8
+    psrad              m%1, 14
+    psrad              m%2, 14
+    packssdw           m%4, m%2
+    packssdw           m%3, m%1
+    punpckhwd          m%2, m%4, m%3
+    punpcklwd          m%4, m%3
+    pmaddwd            m%3, m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_%6_%5]
+    pmaddwd            m%4, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_m%5_%6]
+    paddd              m%3, %7
+    paddd              m%4, %7
+    psrad              m%3, 14
+    psrad              m%4, 14
+    paddd              m%1, m%3
+    paddd              m%2, m%4
+%endmacro
+
+%macro IDCT4_12BPP_1D 0-8 [pd_8192], [pd_3fff], 0, 1, 2, 3, 4, 5 ; rnd, mask, in/out0-3, tmp0-1
+    SUMSUB_MUL          %3, %5, %7, %8, 11585, 11585, %1, %2
+    SUMSUB_MUL          %4, %6, %7, %8, 15137,  6270, %1, %2
+    SUMSUB_BA        d, %4, %3, %7
+    SUMSUB_BA        d, %6, %5, %7
+    SWAP                %4, %6, %3
+%endmacro
+
+%macro STORE_4x4 6 ; tmp1-2, reg1-2, min, max
+    movh               m%1, [dstq+strideq*0]
+    movh               m%2, [dstq+strideq*2]
+    movhps             m%1, [dstq+strideq*1]
+    movhps             m%2, [dstq+stride3q ]
+    paddw              m%1, m%3
+    paddw              m%2, m%4
+    pmaxsw             m%1, %5
+    pmaxsw             m%2, %5
+    pminsw             m%1, %6
+    pminsw             m%2, %6
+    movh   [dstq+strideq*0], m%1
+    movhps [dstq+strideq*1], m%1
+    movh   [dstq+strideq*2], m%2
+    movhps [dstq+stride3q ], m%2
+%endmacro
+
+%macro ROUND_AND_STORE_4x4 8 ; reg1-4, min, max, rnd, shift
+    paddd              m%1, %7
+    paddd              m%2, %7
+    paddd              m%3, %7
+    paddd              m%4, %7
+    psrad              m%1, %8
+    psrad              m%2, %8
+    psrad              m%3, %8
+    psrad              m%4, %8
+    packssdw           m%1, m%2
+    packssdw           m%3, m%4
+    STORE_4x4           %2, %4, %1, %3, %5, %6
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_4x4_add_12, 4, 4, 8, dst, stride, block, eob
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - this is special, since for 4x4 12bpp, the max coef size is
+    ; 17+sign bpp. Since the multiply is with 11585, which is 14bpp, the
+    ; result of each multiply is 31+sign bit, i.e. it _exactly_ fits in a
+    ; dword. After the final shift (4), the result is 13+sign bits, so we
+    ; don't need any additional processing to fit it in a word
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m4, m4
+    DC_ONLY              4, m4
+    movd                m0, coefd
+    pshuflw             m0, m0, q0000
+    punpcklqdq          m0, m0
+    mova                m5, [pw_4095]
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    STORE_4x4            1, 3, 0, 0, m4, m5
+    RET
+
+.idctfull:
+    DEFINE_ARGS dst, stride, block, eob
+    mova                m0, [blockq+0*16]
+    mova                m1, [blockq+1*16]
+    mova                m2, [blockq+2*16]
+    mova                m3, [blockq+3*16]
+    mova                m6, [pd_8192]
+    mova                m7, [pd_3fff]
+
+    IDCT4_12BPP_1D      m6, m7
+    TRANSPOSE4x4D        0, 1, 2, 3, 4
+    IDCT4_12BPP_1D      m6, m7
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+
+    ; writeout
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    mova                m5, [pw_4095]
+    mova                m6, [pd_8]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m4, m5, m6, 4
+    RET
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+; out0 =  5283 * in0 + 13377 + in1 + 15212 * in2 +  9929 * in3 + rnd >> 14
+; out1 =  9929 * in0 + 13377 * in1 -  5283 * in2 - 15282 * in3 + rnd >> 14
+; out2 = 13377 * in0               - 13377 * in2 + 13377 * in3 + rnd >> 14
+; out3 = 15212 * in0 - 13377 * in1 +  9929 * in2 -  5283 * in3 + rnd >> 14
+%macro IADST4_12BPP_1D 0-2 [pd_8192], [pd_3fff] ; rnd, mask
+    pand                m4, m0, %2
+    pand                m5, m1, %2
+    psrad               m0, 14
+    psrad               m1, 14
+    packssdw            m5, m1
+    packssdw            m4, m0
+    punpckhwd           m1, m4, m5
+    punpcklwd           m4, m5
+    pand                m5, m2, %2
+    pand                m6, m3, %2
+    psrad               m2, 14
+    psrad               m3, 14
+    packssdw            m6, m3
+    packssdw            m5, m2
+    punpckhwd           m3, m5, m6
+    punpcklwd           m5, m6
+    SCRATCH              1,  8, rsp+0*mmsize, a
+    SCRATCH              5,  9, rsp+1*mmsize, b
+
+    ; m1/3 have the high bits of 0,1,2,3
+    ; m4/5 have the low bits of 0,1,2,3
+    ; m0/2/6/7 are free
+
+    mova                m2, [pw_15212_9929]
+    mova                m0, [pw_5283_13377]
+    pmaddwd             m7, m2, reg_b
+    pmaddwd             m6, m4, m0
+    pmaddwd             m2, m3
+    pmaddwd             m0, reg_a
+    paddd               m6, m7
+    paddd               m0, m2
+    mova                m1, [pw_m13377_13377]
+    mova                m5, [pw_13377_0]
+    pmaddwd             m7, m1, reg_b
+    pmaddwd             m2, m4, m5
+    pmaddwd             m1, m3
+    pmaddwd             m5, reg_a
+    paddd               m2, m7
+    paddd               m1, m5
+    paddd               m6, %1
+    paddd               m2, %1
+    psrad               m6, 14
+    psrad               m2, 14
+    paddd               m0, m6                      ; t0
+    paddd               m2, m1                      ; t2
+
+    mova                m7, [pw_m5283_m15212]
+    mova                m5, [pw_9929_13377]
+    pmaddwd             m1, m7, reg_b
+    pmaddwd             m6, m4, m5
+    pmaddwd             m7, m3
+    pmaddwd             m5, reg_a
+    paddd               m6, m1
+    paddd               m7, m5
+    UNSCRATCH            5,  9, rsp+1*mmsize, b
+    pmaddwd             m5, [pw_9929_m5283]
+    pmaddwd             m4, [pw_15212_m13377]
+    pmaddwd             m3, [pw_9929_m5283]
+    UNSCRATCH            1,  8, rsp+0*mmsize, a
+    pmaddwd             m1, [pw_15212_m13377]
+    paddd               m4, m5
+    paddd               m3, m1
+    paddd               m6, %1
+    paddd               m4, %1
+    psrad               m6, 14
+    psrad               m4, 14
+    paddd               m7, m6                      ; t1
+    paddd               m3, m4                      ; t3
+
+    SWAP                 1, 7
+%endmacro
+
+%macro IADST4_12BPP_FN 4
+cglobal vp9_%1_%3_4x4_add_12, 3, 3, 12, 2 * ARCH_X86_32 * mmsize, dst, stride, block, eob
+    mova                m0, [blockq+0*16]
+    mova                m1, [blockq+1*16]
+    mova                m2, [blockq+2*16]
+    mova                m3, [blockq+3*16]
+
+    PRELOAD             10, pd_8192, rnd
+    PRELOAD             11, pd_3fff, mask
+    %2_12BPP_1D    reg_rnd, reg_mask
+    TRANSPOSE4x4D        0, 1, 2, 3, 4
+    %4_12BPP_1D    reg_rnd, reg_mask
+
+    pxor                m4, m4
+    ZERO_BLOCK      blockq, 16, 4, m4
+
+    ; writeout
+    DEFINE_ARGS dst, stride, stride3
+    lea           stride3q, [strideq*3]
+    mova                m5, [pw_4095]
+    mova                m6, [pd_8]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m4, m5, m6, 4
+    RET
+%endmacro
+
+INIT_XMM sse2
+IADST4_12BPP_FN idct,  IDCT4,  iadst, IADST4
+IADST4_12BPP_FN iadst, IADST4, idct,  IDCT4
+IADST4_12BPP_FN iadst, IADST4, iadst, IADST4
+
+; the following line has not been executed at the end of this macro:
+; UNSCRATCH            6, 8, rsp+%3*mmsize
+%macro IDCT8_1D 1-5 [pd_8192], [pd_3fff], 2 * mmsize, 17 ; src, rnd, mask, src_stride, stack_offset
+    mova                m0, [%1+0*%4]
+    mova                m2, [%1+2*%4]
+    mova                m4, [%1+4*%4]
+    mova                m6, [%1+6*%4]
+    IDCT4_12BPP_1D      %2, %3, 0, 2, 4, 6, 1, 3            ; m0/2/4/6 have t0/1/2/3
+    SCRATCH              4, 8, rsp+(%5+0)*mmsize
+    SCRATCH              6, 9, rsp+(%5+1)*mmsize
+    mova                m1, [%1+1*%4]
+    mova                m3, [%1+3*%4]
+    mova                m5, [%1+5*%4]
+    mova                m7, [%1+7*%4]
+    SUMSUB_MUL           1, 7, 4, 6, 16069,  3196, %2, %3   ; m1=t7a, m7=t4a
+    SUMSUB_MUL           5, 3, 4, 6,  9102, 13623, %2, %3   ; m5=t6a, m3=t5a
+    SUMSUB_BA         d, 3, 7, 4                            ; m3=t4, m7=t5a
+    SUMSUB_BA         d, 5, 1, 4                            ; m5=t7, m1=t6a
+    SUMSUB_MUL           1, 7, 4, 6, 11585, 11585, %2, %3   ; m1=t6, m7=t5
+    SUMSUB_BA         d, 5, 0, 4                            ; m5=out0, m0=out7
+    SUMSUB_BA         d, 1, 2, 4                            ; m1=out1, m2=out6
+    UNSCRATCH            4, 8, rsp+(%5+0)*mmsize
+    UNSCRATCH            6, 9, rsp+(%5+1)*mmsize
+    SCRATCH              2, 8, rsp+(%5+0)*mmsize
+    SUMSUB_BA         d, 7, 4, 2                            ; m7=out2, m4=out5
+    SUMSUB_BA         d, 3, 6, 2                            ; m3=out3, m6=out4
+    SWAP                 0, 5, 4, 6, 2, 7
+%endmacro
+
+%macro STORE_2x8 5-7 dstq, strideq ; tmp1-2, reg, min, max
+    mova               m%1, [%6+%7*0]
+    mova               m%2, [%6+%7*1]
+    paddw              m%1, m%3
+    paddw              m%2, m%3
+    pmaxsw             m%1, %4
+    pmaxsw             m%2, %4
+    pminsw             m%1, %5
+    pminsw             m%2, %5
+    mova         [%6+%7*0], m%1
+    mova         [%6+%7*1], m%2
+%endmacro
+
+; FIXME we can use the intermediate storage (rsp[0-15]) on x86-32 for temp
+; storage also instead of allocating two more stack spaces. This doesn't
+; matter much but it's something...
+INIT_XMM sse2
+cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \
+                                  16 * mmsize + 3 * ARCH_X86_32 * mmsize, \
+                                  dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 16+sign bit, and the coef is 14bit, so 30+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              5, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 4
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    SCRATCH              0, 12, rsp+16*mmsize, max
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_8x8]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_8x8+cntq-1]
+%endif
+    mov              skipd, 2
+    sub              skipd, cntd
+    mov               ptrq, rsp
+    PRELOAD             10, pd_8192, rnd
+    PRELOAD             11, pd_3fff, mask
+    PRELOAD             13, pd_16, srnd
+.loop_1:
+    IDCT8_1D        blockq, reg_rnd, reg_mask
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 6
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 2*mmsize], m1
+    mova  [ptrq+ 4*mmsize], m2
+    mova  [ptrq+ 6*mmsize], m3
+    UNSCRATCH            6, 8, rsp+17*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 1*mmsize], m4
+    mova  [ptrq+ 3*mmsize], m5
+    mova  [ptrq+ 5*mmsize], m6
+    mova  [ptrq+ 7*mmsize], m7
+    add               ptrq, 8 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    add               ptrq, 4 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 2
+    mov               ptrq, rsp
+.loop_2:
+    IDCT8_1D          ptrq, reg_rnd, reg_mask
+
+    pxor                m6, m6
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m6, reg_max, reg_srnd, 5
+    lea               dstq, [dstq+strideq*4]
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    UNSCRATCH            1, 12, rsp+16*mmsize, max
+    UNSCRATCH            2, 13, pd_16, srnd
+    ROUND_AND_STORE_4x4  4, 5, 0, 7, m6, m1, m2, 5
+    add               ptrq, 16
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+8]
+%else
+    mov               dstq, dstm
+    add               dstq, 8
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m6 is still zero
+    ZERO_BLOCK blockq-2*mmsize, 32, 8, m6
+    RET
+
+%macro DC_ONLY_64BIT 2 ; shift, zero
+%if ARCH_X86_64
+    movsxd           coefq, dword [blockq]
+    movd          [blockq], %2
+    imul             coefq, 11585
+    add              coefq, 8192
+    sar              coefq, 14
+    imul             coefq, 11585
+    add              coefq, ((1 << (%1 - 1)) << 14) + 8192
+    sar              coefq, 14 + %1
+%else
+    mov              coefd, dword [blockq]
+    movd          [blockq], %2
+    DEFINE_ARGS dst, stride, cnt, coef, coefl
+    mov               cntd, 2
+.loop_dc_calc:
+    mov             coefld, coefd
+    sar              coefd, 14
+    and             coefld, 0x3fff
+    imul             coefd, 11585
+    imul            coefld, 11585
+    add             coefld, 8192
+    sar             coefld, 14
+    add              coefd, coefld
+    dec               cntd
+    jg .loop_dc_calc
+    add              coefd, 1 << (%1 - 1)
+    sar              coefd, %1
+%endif
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_8x8_add_12, 4, 6 + ARCH_X86_64, 14, \
+                                  16 * mmsize + 3 * ARCH_X86_32 * mmsize, \
+                                  dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_8x8_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 18+sign
+    ; bpp, and 18+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        5, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 4
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+; inputs and outputs are dwords, coefficients are words
+;
+; dst1[hi]:dst3[lo] = src1 * coef1 + src2 * coef2
+; dst2[hi]:dst4[lo] = src1 * coef2 - src2 * coef1
+%macro SUMSUB_MUL_D 6-7 [pd_3fff] ; src/dst 1-2, dst3-4, coef1-2, mask
+    pand               m%3, m%1, %7
+    pand               m%4, m%2, %7
+    psrad              m%1, 14
+    psrad              m%2, 14
+    packssdw           m%4, m%2
+    packssdw           m%3, m%1
+    punpckhwd          m%2, m%4, m%3
+    punpcklwd          m%4, m%3
+    pmaddwd            m%3, m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_%6_%5]
+    pmaddwd            m%4, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_m%5_%6]
+%endmacro
+
+; dst1 = src2[hi]:src4[lo] + src1[hi]:src3[lo] + rnd >> 14
+; dst2 = src2[hi]:src4[lo] - src1[hi]:src3[lo] + rnd >> 14
+%macro SUMSUB_PACK_D 5-6 [pd_8192] ; src/dst 1-2, src3-4, tmp, rnd
+    SUMSUB_BA        d, %1, %2, %5
+    SUMSUB_BA        d, %3, %4, %5
+    paddd              m%3, %6
+    paddd              m%4, %6
+    psrad              m%3, 14
+    psrad              m%4, 14
+    paddd              m%1, m%3
+    paddd              m%2, m%4
+%endmacro
+
+%macro NEGD 1
+%if cpuflag(ssse3)
+    psignd              %1, [pw_m1]
+%else
+    pxor                %1, [pw_m1]
+    paddd               %1, [pd_1]
+%endif
+%endmacro
+
+; the following line has not been executed at the end of this macro:
+; UNSCRATCH            6, 8, rsp+17*mmsize
+%macro IADST8_1D 1-3 [pd_8192], [pd_3fff] ; src, rnd, mask
+    mova                m0, [%1+ 0*mmsize]
+    mova                m3, [%1+ 6*mmsize]
+    mova                m4, [%1+ 8*mmsize]
+    mova                m7, [%1+14*mmsize]
+    SUMSUB_MUL_D         7, 0, 1, 2, 16305,  1606, %3   ; m7/1=t0a, m0/2=t1a
+    SUMSUB_MUL_D         3, 4, 5, 6, 10394, 12665, %3   ; m3/5=t4a, m4/6=t5a
+    SCRATCH              0, 8, rsp+17*mmsize
+    SUMSUB_PACK_D        3, 7, 5, 1, 0, %2              ; m3=t0, m7=t4
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    SUMSUB_PACK_D        4, 0, 6, 2, 1, %2              ; m4=t1, m0=t5
+
+    SCRATCH              3, 8, rsp+17*mmsize
+    SCRATCH              4, 9, rsp+18*mmsize
+    SCRATCH              7, 10, rsp+19*mmsize
+    SCRATCH              0, 11, rsp+20*mmsize
+
+    mova                m1, [%1+ 2*mmsize]
+    mova                m2, [%1+ 4*mmsize]
+    mova                m5, [%1+10*mmsize]
+    mova                m6, [%1+12*mmsize]
+    SUMSUB_MUL_D         5, 2, 3, 4, 14449,  7723, %3   ; m5/8=t2a, m2/9=t3a
+    SUMSUB_MUL_D         1, 6, 7, 0,  4756, 15679, %3   ; m1/10=t6a, m6/11=t7a
+    SCRATCH              2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        1, 5, 7, 3, 2, %2              ; m1=t2, m5=t6
+    UNSCRATCH            2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        6, 2, 0, 4, 3, %2              ; m6=t3, m2=t7
+
+    UNSCRATCH            7, 10, rsp+19*mmsize
+    UNSCRATCH            0, 11, rsp+20*mmsize
+    SCRATCH              1, 10, rsp+19*mmsize
+    SCRATCH              6, 11, rsp+20*mmsize
+
+    SUMSUB_MUL_D         7, 0, 3, 4, 15137,  6270, %3   ; m7/8=t4a, m0/9=t5a
+    SUMSUB_MUL_D         2, 5, 1, 6,  6270, 15137, %3   ; m2/10=t7a, m5/11=t6a
+    SCRATCH              2, 12, rsp+21*mmsize
+    SUMSUB_PACK_D        5, 7, 6, 3, 2, %2              ; m5=-out1, m7=t6
+    UNSCRATCH            2, 12, rsp+21*mmsize
+    NEGD                m5                              ; m5=out1
+    SUMSUB_PACK_D        2, 0, 1, 4, 3, %2              ; m2=out6, m0=t7
+    SUMSUB_MUL           7, 0, 3, 4, 11585, 11585, %2, %3   ; m7=out2, m0=-out5
+    NEGD                m0                              ; m0=out5
+
+    UNSCRATCH            3, 8, rsp+17*mmsize
+    UNSCRATCH            4, 9, rsp+18*mmsize
+    UNSCRATCH            1, 10, rsp+19*mmsize
+    UNSCRATCH            6, 11, rsp+20*mmsize
+    SCRATCH              2, 8, rsp+17*mmsize
+    SCRATCH              0, 9, rsp+18*mmsize
+
+    SUMSUB_BA         d, 1, 3,  2                       ; m1=out0, m3=t2
+    SUMSUB_BA         d, 6, 4,  2                       ; m6=-out7, m4=t3
+    NEGD                m6                              ; m6=out7
+    SUMSUB_MUL           3, 4,  2,  0, 11585, 11585, %2, %3 ; m3=-out3, m4=out4
+    NEGD                m3                              ; m3=out3
+
+    UNSCRATCH            0, 9, rsp+18*mmsize
+
+    SWAP                 0, 1, 5
+    SWAP                 2, 7, 6
+%endmacro
+
+%macro IADST8_FN 5
+cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \
+                              16 * mmsize + ARCH_X86_32 * 6 * mmsize, \
+                              dst, stride, block, eob
+    mova                m0, [pw_1023]
+
+.body:
+    SCRATCH              0, 13, rsp+16*mmsize, max
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [%5_8x8]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [%5_8x8+cntq-1]
+%endif
+    mov              skipd, 2
+    sub              skipd, cntd
+    mov               ptrq, rsp
+    PRELOAD             14, pd_8192, rnd
+    PRELOAD             15, pd_3fff, mask
+.loop_1:
+    %2_1D           blockq, reg_rnd, reg_mask
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 6
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 2*mmsize], m1
+    mova  [ptrq+ 4*mmsize], m2
+    mova  [ptrq+ 6*mmsize], m3
+    UNSCRATCH            6, 8, rsp+17*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 1*mmsize], m4
+    mova  [ptrq+ 3*mmsize], m5
+    mova  [ptrq+ 5*mmsize], m6
+    mova  [ptrq+ 7*mmsize], m7
+    add               ptrq, 8 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    add               ptrq, 4 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 2
+    mov               ptrq, rsp
+.loop_2:
+    %4_1D             ptrq, reg_rnd, reg_mask
+
+    pxor                m6, m6
+    PRELOAD              9, pd_16, srnd
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m6, reg_max, reg_srnd, 5
+    lea               dstq, [dstq+strideq*4]
+    UNSCRATCH            0, 8, rsp+17*mmsize
+    UNSCRATCH            1, 13, rsp+16*mmsize, max
+    UNSCRATCH            2, 9, pd_16, srnd
+    ROUND_AND_STORE_4x4  4, 5, 0, 7, m6, m1, m2, 5
+    add               ptrq, 16
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+8]
+%else
+    mov               dstq, dstm
+    add               dstq, 8
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m6 is still zero
+    ZERO_BLOCK blockq-2*mmsize, 32, 8, m6
+    RET
+
+cglobal vp9_%1_%3_8x8_add_12, 4, 6 + ARCH_X86_64, 16, \
+                              16 * mmsize + ARCH_X86_32 * 6 * mmsize, \
+                              dst, stride, block, eob
+    mova                m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_%3_8x8_add_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+IADST8_FN idct,  IDCT8,  iadst, IADST8, row
+IADST8_FN iadst, IADST8, idct,  IDCT8,  col
+IADST8_FN iadst, IADST8, iadst, IADST8, default
+
+%macro IDCT16_1D 1-4 4 * mmsize, 65, 67 ; src, src_stride, stack_offset, mm32bit_stack_offset
+    IDCT8_1D            %1, [pd_8192], [pd_3fff], %2 * 2, %4    ; m0-3=t0-3a, m4-5/m8|r67/m7=t4-7
+    ; SCRATCH            6, 8, rsp+(%4+0)*mmsize    ; t6
+    SCRATCH              0, 15, rsp+(%4+7)*mmsize   ; t0a
+    SCRATCH              1, 14, rsp+(%4+6)*mmsize   ; t1a
+    SCRATCH              2, 13, rsp+(%4+5)*mmsize   ; t2a
+    SCRATCH              3, 12, rsp+(%4+4)*mmsize   ; t3a
+    SCRATCH              4, 11, rsp+(%4+3)*mmsize   ; t4
+    mova [rsp+(%3+0)*mmsize], m5                    ; t5
+    mova [rsp+(%3+1)*mmsize], m7                    ; t7
+
+    mova                m0, [%1+ 1*%2]              ; in1
+    mova                m3, [%1+ 7*%2]              ; in7
+    mova                m4, [%1+ 9*%2]              ; in9
+    mova                m7, [%1+15*%2]              ; in15
+
+    SUMSUB_MUL           0, 7, 1, 2, 16305,  1606   ; m0=t15a, m7=t8a
+    SUMSUB_MUL           4, 3, 1, 2, 10394, 12665   ; m4=t14a, m3=t9a
+    SUMSUB_BA         d, 3, 7, 1                    ; m3=t8, m7=t9
+    SUMSUB_BA         d, 4, 0, 1                    ; m4=t15,m0=t14
+    SUMSUB_MUL           0, 7, 1, 2, 15137,  6270   ; m0=t14a, m7=t9a
+
+    mova                m1, [%1+ 3*%2]              ; in3
+    mova                m2, [%1+ 5*%2]              ; in5
+    mova                m5, [%1+11*%2]              ; in11
+    mova                m6, [%1+13*%2]              ; in13
+
+    SCRATCH              0,  9, rsp+(%4+1)*mmsize
+    SCRATCH              7, 10, rsp+(%4+2)*mmsize
+
+    SUMSUB_MUL           2, 5, 0, 7, 14449,  7723   ; m2=t13a, m5=t10a
+    SUMSUB_MUL           6, 1, 0, 7,  4756, 15679   ; m6=t12a, m1=t11a
+    SUMSUB_BA         d, 5, 1, 0                    ; m5=t11,m1=t10
+    SUMSUB_BA         d, 2, 6, 0                    ; m2=t12,m6=t13
+    NEGD                m1                          ; m1=-t10
+    SUMSUB_MUL           1, 6, 0, 7, 15137,  6270   ; m1=t13a, m6=t10a
+
+    UNSCRATCH            7, 10, rsp+(%4+2)*mmsize
+    SUMSUB_BA         d, 5, 3, 0                    ; m5=t8a, m3=t11a
+    SUMSUB_BA         d, 6, 7, 0                    ; m6=t9,  m7=t10
+    SUMSUB_BA         d, 2, 4, 0                    ; m2=t15a,m4=t12a
+    SCRATCH              5, 10, rsp+(%4+2)*mmsize
+    SUMSUB_MUL           4, 3, 0, 5, 11585, 11585   ; m4=t12, m3=t11
+    UNSCRATCH            0, 9, rsp+(%4+1)*mmsize
+    SUMSUB_BA         d, 1, 0, 5                    ; m1=t14, m0=t13
+    SCRATCH              6, 9, rsp+(%4+1)*mmsize
+    SUMSUB_MUL           0, 7, 6, 5, 11585, 11585   ; m0=t13a,m7=t10a
+
+    ; order: 15|r74,14|r73,13|r72,12|r71,11|r70,r65,8|r67,r66,10|r69,9|r68,7,3,4,0,1,2
+    ; free: 6,5
+
+    UNSCRATCH            5, 15, rsp+(%4+7)*mmsize
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=out0, m5=out15
+    SCRATCH              5, 15, rsp+(%4+7)*mmsize
+    UNSCRATCH            5, 14, rsp+(%4+6)*mmsize
+    SUMSUB_BA         d, 1, 5, 6                    ; m1=out1, m5=out14
+    SCRATCH              5, 14, rsp+(%4+6)*mmsize
+    UNSCRATCH            5, 13, rsp+(%4+5)*mmsize
+    SUMSUB_BA         d, 0, 5, 6                    ; m0=out2, m5=out13
+    SCRATCH              5, 13, rsp+(%4+5)*mmsize
+    UNSCRATCH            5, 12, rsp+(%4+4)*mmsize
+    SUMSUB_BA         d, 4, 5, 6                    ; m4=out3, m5=out12
+    SCRATCH              5, 12, rsp+(%4+4)*mmsize
+    UNSCRATCH            5, 11, rsp+(%4+3)*mmsize
+    SUMSUB_BA         d, 3, 5, 6                    ; m3=out4, m5=out11
+    SCRATCH              4, 11, rsp+(%4+3)*mmsize
+    mova                m4, [rsp+(%3+0)*mmsize]
+    SUMSUB_BA         d, 7, 4, 6                    ; m7=out5, m4=out10
+    mova [rsp+(%3+0)*mmsize], m5
+    UNSCRATCH            5, 8, rsp+(%4+0)*mmsize
+    UNSCRATCH            6, 9, rsp+(%4+1)*mmsize
+    SCRATCH              2, 8, rsp+(%4+0)*mmsize
+    SCRATCH              1, 9, rsp+(%4+1)*mmsize
+    UNSCRATCH            1, 10, rsp+(%4+2)*mmsize
+    SCRATCH              0, 10, rsp+(%4+2)*mmsize
+    mova                m0, [rsp+(%3+1)*mmsize]
+    SUMSUB_BA         d, 6, 5, 2                    ; m6=out6, m5=out9
+    SUMSUB_BA         d, 1, 0, 2                    ; m1=out7, m0=out8
+
+    SWAP                 0, 3, 1, 7, 2, 6, 4
+
+    ; output order: 8-11|r67-70=out0-3
+    ;               0-6,r65=out4-11
+    ;               12-15|r71-74=out12-15
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                    67 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 8
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,         mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+strideq, mmsize
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    mova   [rsp+64*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_16x16]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_16x16+cntq-1]
+%endif
+    mov              skipd, 4
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    IDCT16_1D       blockq
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+ 9*mmsize], m2
+    mova  [ptrq+13*mmsize], m3
+    mova                m7, [rsp+65*mmsize]
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m4
+    mova  [ptrq+ 6*mmsize], m5
+    mova  [ptrq+10*mmsize], m6
+    mova  [ptrq+14*mmsize], m7
+    UNSCRATCH               0, 8, rsp+67*mmsize
+    UNSCRATCH               1, 9, rsp+68*mmsize
+    UNSCRATCH               2, 10, rsp+69*mmsize
+    UNSCRATCH               3, 11, rsp+70*mmsize
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 4*mmsize], m1
+    mova  [ptrq+ 8*mmsize], m2
+    mova  [ptrq+12*mmsize], m3
+    UNSCRATCH               4, 12, rsp+71*mmsize
+    UNSCRATCH               5, 13, rsp+72*mmsize
+    UNSCRATCH               6, 14, rsp+73*mmsize
+    UNSCRATCH               7, 15, rsp+74*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 3*mmsize], m4
+    mova  [ptrq+ 7*mmsize], m5
+    mova  [ptrq+11*mmsize], m6
+    mova  [ptrq+15*mmsize], m7
+    add               ptrq, 16 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 4
+    mov               ptrq, rsp
+.loop_2:
+    IDCT16_1D         ptrq
+
+    pxor               m7, m7
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6
+    lea               dstq, [dstq+strideq*4]
+    mova                m0, [rsp+65*mmsize]
+    mova                m1, [rsp+64*mmsize]
+    mova                m2, [pd_32]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else
+    mov               dstq, dstm
+%endif
+    UNSCRATCH               0, 8, rsp+67*mmsize
+    UNSCRATCH               4, 9, rsp+68*mmsize
+    UNSCRATCH               5, 10, rsp+69*mmsize
+    UNSCRATCH               3, 11, rsp+70*mmsize
+    ROUND_AND_STORE_4x4  0, 4, 5, 3, m7, m1, m2, 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea               dstq, [dstbakq+stride3q*4]
+%else
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    UNSCRATCH               4, 12, rsp+71*mmsize
+    UNSCRATCH               5, 13, rsp+72*mmsize
+    UNSCRATCH               6, 14, rsp+73*mmsize
+    UNSCRATCH               0, 15, rsp+74*mmsize
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-4*mmsize, 64, 16, m7
+    RET
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                    67 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_16x16_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign
+    ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 8
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,         mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+strideq, mmsize
+    lea               dstq, [dstq+strideq*2]
+    dec               cntd
+    jg .loop_dc
+    RET
+
+; r65-69 are available for spills
+; r70-77 are available on x86-32 only (x86-64 should use m8-15)
+; output should be in m8-11|r70-73, m0-6,r65 and m12-15|r74-77
+%macro IADST16_1D 1 ; src
+    mova                m0, [%1+ 0*4*mmsize]        ; in0
+    mova                m1, [%1+ 7*4*mmsize]        ; in7
+    mova                m2, [%1+ 8*4*mmsize]        ; in8
+    mova                m3, [%1+15*4*mmsize]        ; in15
+    SUMSUB_MUL_D         3, 0, 4, 5, 16364,  804    ; m3/4=t0, m0/5=t1
+    SUMSUB_MUL_D         1, 2, 6, 7, 11003, 12140   ; m1/6=t8, m2/7=t9
+    SCRATCH              0, 8, rsp+70*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 4, 0              ; m1=t0a, m3=t8a
+    UNSCRATCH            0, 8, rsp+70*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 5, 4              ; m2=t1a, m0=t9a
+    mova   [rsp+67*mmsize], m1
+    SCRATCH              2, 9, rsp+71*mmsize
+    SCRATCH              3, 12, rsp+74*mmsize
+    SCRATCH              0, 13, rsp+75*mmsize
+
+    mova                m0, [%1+ 3*4*mmsize]        ; in3
+    mova                m1, [%1+ 4*4*mmsize]        ; in4
+    mova                m2, [%1+11*4*mmsize]        ; in11
+    mova                m3, [%1+12*4*mmsize]        ; in12
+    SUMSUB_MUL_D         2, 1, 4, 5, 14811,  7005   ; m2/4=t4, m1/5=t5
+    SUMSUB_MUL_D         0, 3, 6, 7,  5520, 15426   ; m0/6=t12, m3/7=t13
+    SCRATCH              1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        0, 2, 6, 4, 1              ; m0=t4a, m2=t12a
+    UNSCRATCH            1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=t5a, m1=t13a
+    SCRATCH              0, 15, rsp+77*mmsize
+    SCRATCH              3, 11, rsp+73*mmsize
+
+    UNSCRATCH            0, 12, rsp+74*mmsize       ; t8a
+    UNSCRATCH            3, 13, rsp+75*mmsize       ; t9a
+    SUMSUB_MUL_D         0, 3, 4, 5, 16069,  3196   ; m0/4=t8, m3/5=t9
+    SUMSUB_MUL_D         1, 2, 6, 7,  3196, 16069   ; m1/6=t13, m2/7=t12
+    SCRATCH              1, 12, rsp+74*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 4, 1              ; m2=t8a, m0=t12a
+    UNSCRATCH            1, 12, rsp+74*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 5, 4              ; m1=t9a, m3=t13a
+    mova   [rsp+65*mmsize], m2
+    mova   [rsp+66*mmsize], m1
+    SCRATCH              0, 8, rsp+70*mmsize
+    SCRATCH              3, 12, rsp+74*mmsize
+
+    mova                m0, [%1+ 2*4*mmsize]        ; in2
+    mova                m1, [%1+ 5*4*mmsize]        ; in5
+    mova                m2, [%1+10*4*mmsize]        ; in10
+    mova                m3, [%1+13*4*mmsize]        ; in13
+    SUMSUB_MUL_D         3, 0, 4, 5, 15893,  3981   ; m3/4=t2, m0/5=t3
+    SUMSUB_MUL_D         1, 2, 6, 7,  8423, 14053   ; m1/6=t10, m2/7=t11
+    SCRATCH              0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        1, 3, 6, 4, 0              ; m1=t2a, m3=t10a
+    UNSCRATCH            0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        2, 0, 7, 5, 4              ; m2=t3a, m0=t11a
+    mova   [rsp+68*mmsize], m1
+    mova   [rsp+69*mmsize], m2
+    SCRATCH              3, 13, rsp+75*mmsize
+    SCRATCH              0, 14, rsp+76*mmsize
+
+    mova                m0, [%1+ 1*4*mmsize]        ; in1
+    mova                m1, [%1+ 6*4*mmsize]        ; in6
+    mova                m2, [%1+ 9*4*mmsize]        ; in9
+    mova                m3, [%1+14*4*mmsize]        ; in14
+    SUMSUB_MUL_D         2, 1, 4, 5, 13160,  9760   ; m2/4=t6, m1/5=t7
+    SUMSUB_MUL_D         0, 3, 6, 7,  2404, 16207   ; m0/6=t14, m3/7=t15
+    SCRATCH              1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        0, 2, 6, 4, 1              ; m0=t6a, m2=t14a
+    UNSCRATCH            1, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=t7a, m1=t15a
+
+    UNSCRATCH            4, 13, rsp+75*mmsize       ; t10a
+    UNSCRATCH            5, 14, rsp+76*mmsize       ; t11a
+    SCRATCH              0, 13, rsp+75*mmsize
+    SCRATCH              3, 14, rsp+76*mmsize
+    SUMSUB_MUL_D         4, 5, 6, 7,  9102, 13623   ; m4/6=t10, m5/7=t11
+    SUMSUB_MUL_D         1, 2, 0, 3, 13623,  9102   ; m1/0=t15, m2/3=t14
+    SCRATCH              0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        2, 4, 3, 6, 0              ; m2=t10a, m4=t14a
+    UNSCRATCH            0, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        1, 5, 0, 7, 6              ; m1=t11a, m5=t15a
+
+    UNSCRATCH            0, 8, rsp+70*mmsize        ; t12a
+    UNSCRATCH            3, 12, rsp+74*mmsize       ; t13a
+    SCRATCH              2, 8, rsp+70*mmsize
+    SCRATCH              1, 12, rsp+74*mmsize
+    SUMSUB_MUL_D         0, 3, 1, 2, 15137,  6270   ; m0/1=t12, m3/2=t13
+    SUMSUB_MUL_D         5, 4, 7, 6,  6270, 15137   ; m5/7=t15, m4/6=t14
+    SCRATCH              2, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        4, 0, 6, 1, 2              ; m4=out2, m0=t14a
+    UNSCRATCH            2, 10, rsp+72*mmsize
+    SUMSUB_PACK_D        5, 3, 7, 2, 1              ; m5=-out13, m3=t15a
+    NEGD                m5                          ; m5=out13
+
+    UNSCRATCH            1, 9, rsp+71*mmsize        ; t1a
+    mova                m2, [rsp+68*mmsize]         ; t2a
+    UNSCRATCH            6, 13, rsp+75*mmsize       ; t6a
+    UNSCRATCH            7, 14, rsp+76*mmsize       ; t7a
+    SCRATCH              4, 10, rsp+72*mmsize
+    SCRATCH              5, 13, rsp+75*mmsize
+    UNSCRATCH            4, 15, rsp+77*mmsize       ; t4a
+    UNSCRATCH            5, 11, rsp+73*mmsize       ; t5a
+    SCRATCH              0, 14, rsp+76*mmsize
+    SCRATCH              3, 15, rsp+77*mmsize
+    mova                m0, [rsp+67*mmsize]         ; t0a
+    SUMSUB_BA         d, 4, 0, 3                    ; m4=t0, m0=t4
+    SUMSUB_BA         d, 5, 1, 3                    ; m5=t1, m1=t5
+    SUMSUB_BA         d, 6, 2, 3                    ; m6=t2, m2=t6
+    SCRATCH              4, 9, rsp+71*mmsize
+    mova                m3, [rsp+69*mmsize]         ; t3a
+    SUMSUB_BA         d, 7, 3, 4                    ; m7=t3, m3=t7
+
+    mova   [rsp+67*mmsize], m5
+    mova   [rsp+68*mmsize], m6
+    mova   [rsp+69*mmsize], m7
+    SUMSUB_MUL_D         0, 1, 4, 5, 15137,  6270   ; m0/4=t4a, m1/5=t5a
+    SUMSUB_MUL_D         3, 2, 7, 6,  6270, 15137   ; m3/7=t7a, m2/6=t6a
+    SCRATCH              1, 11, rsp+73*mmsize
+    SUMSUB_PACK_D        2, 0, 6, 4, 1              ; m2=-out3, m0=t6
+    NEGD                m2                          ; m2=out3
+    UNSCRATCH            1, 11, rsp+73*mmsize
+    SUMSUB_PACK_D        3, 1, 7, 5, 4              ; m3=out12, m1=t7
+    SCRATCH              2, 11, rsp+73*mmsize
+    UNSCRATCH            2, 12, rsp+74*mmsize       ; t11a
+    SCRATCH              3, 12, rsp+74*mmsize
+
+    UNSCRATCH            3, 8, rsp+70*mmsize        ; t10a
+    mova                m4, [rsp+65*mmsize]         ; t8a
+    mova                m5, [rsp+66*mmsize]         ; t9a
+    SUMSUB_BA         d, 3, 4, 6                    ; m3=-out1, m4=t10
+    NEGD                m3                          ; m3=out1
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=out14, m5=t11
+    UNSCRATCH            6, 9, rsp+71*mmsize        ; t0
+    UNSCRATCH            7, 14, rsp+76*mmsize       ; t14a
+    SCRATCH              3, 9, rsp+71*mmsize
+    SCRATCH              2, 14, rsp+76*mmsize
+
+    SUMSUB_MUL           1, 0, 2, 3, 11585, 11585   ; m1=out4, m0=out11
+    mova   [rsp+65*mmsize], m0
+    SUMSUB_MUL           5, 4, 2, 3, 11585, 11585   ; m5=out6, m4=out9
+    UNSCRATCH            0, 15, rsp+77*mmsize       ; t15a
+    SUMSUB_MUL           7, 0, 2, 3, 11585, m11585  ; m7=out10, m0=out5
+
+    mova                m2, [rsp+68*mmsize]         ; t2
+    SUMSUB_BA         d, 2, 6, 3                    ; m2=out0, m6=t2a
+    SCRATCH              2, 8, rsp+70*mmsize
+    mova                m2, [rsp+67*mmsize]         ; t1
+    mova                m3, [rsp+69*mmsize]         ; t3
+    mova   [rsp+67*mmsize], m7
+    SUMSUB_BA         d, 3, 2, 7                    ; m3=-out15, m2=t3a
+    NEGD                m3                          ; m3=out15
+    SCRATCH              3, 15, rsp+77*mmsize
+    SUMSUB_MUL           6, 2, 7, 3, 11585, m11585  ; m6=out8, m2=out7
+    mova                m7, [rsp+67*mmsize]
+
+    SWAP                 0, 1
+    SWAP                 2, 5, 4, 6, 7, 3
+%endmacro
+
+%macro IADST16_FN 7
+cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                70 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                dst, stride, block, eob
+    mova                m0, [pw_1023]
+
+.body:
+    mova   [rsp+64*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [%7_16x16]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [%7_16x16+cntq-1]
+%endif
+    mov              skipd, 4
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    %2_1D           blockq
+
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+ 9*mmsize], m2
+    mova  [ptrq+13*mmsize], m3
+    mova                m7, [rsp+65*mmsize]
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m4
+    mova  [ptrq+ 6*mmsize], m5
+    mova  [ptrq+10*mmsize], m6
+    mova  [ptrq+14*mmsize], m7
+    UNSCRATCH               0, 8, rsp+(%3+0)*mmsize
+    UNSCRATCH               1, 9, rsp+(%3+1)*mmsize
+    UNSCRATCH               2, 10, rsp+(%3+2)*mmsize
+    UNSCRATCH               3, 11, rsp+(%3+3)*mmsize
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 0*mmsize], m0
+    mova  [ptrq+ 4*mmsize], m1
+    mova  [ptrq+ 8*mmsize], m2
+    mova  [ptrq+12*mmsize], m3
+    UNSCRATCH               4, 12, rsp+(%3+4)*mmsize
+    UNSCRATCH               5, 13, rsp+(%3+5)*mmsize
+    UNSCRATCH               6, 14, rsp+(%3+6)*mmsize
+    UNSCRATCH               7, 15, rsp+(%3+7)*mmsize
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 3*mmsize], m4
+    mova  [ptrq+ 7*mmsize], m5
+    mova  [ptrq+11*mmsize], m6
+    mova  [ptrq+15*mmsize], m7
+    add               ptrq, 16 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    add              skipd, skipd
+    lea             blockq, [blockq+skipq*(mmsize/2)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 4
+    mov               ptrq, rsp
+.loop_2:
+    %5_1D             ptrq
+
+    pxor                m7, m7
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+64*mmsize], [pd_32], 6
+    lea               dstq, [dstq+strideq*4]
+    mova                m0, [rsp+65*mmsize]
+    mova                m1, [rsp+64*mmsize]
+    mova                m2, [pd_32]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else
+    mov               dstq, dstm
+%endif
+    UNSCRATCH               0, 8, rsp+(%6+0)*mmsize
+    UNSCRATCH               4, 9, rsp+(%6+1)*mmsize
+    UNSCRATCH               5, 10, rsp+(%6+2)*mmsize
+    UNSCRATCH               3, 11, rsp+(%6+3)*mmsize
+    ROUND_AND_STORE_4x4  0, 4, 5, 3, m7, m1, m2, 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea               dstq, [dstbakq+stride3q*4]
+%else
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    UNSCRATCH               4, 12, rsp+(%6+4)*mmsize
+    UNSCRATCH               5, 13, rsp+(%6+5)*mmsize
+    UNSCRATCH               6, 14, rsp+(%6+6)*mmsize
+    UNSCRATCH               0, 15, rsp+(%6+7)*mmsize
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, m1, m2, 6
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-4*mmsize, 64, 16, m7
+    RET
+
+cglobal vp9_%1_%4_16x16_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                70 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                dst, stride, block, eob
+    mova                m0, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_%4_16x16_add_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+IADST16_FN idct,  IDCT16,  67, iadst, IADST16, 70, row
+IADST16_FN iadst, IADST16, 70, idct,  IDCT16,  67, col
+IADST16_FN iadst, IADST16, 70, iadst, IADST16, 70, default
+
+%macro IDCT32_1D 2-3 8 * mmsize; pass[1/2], src, src_stride
+    IDCT16_1D %2, 2 * %3, 272, 257
+%if ARCH_X86_64
+    mova  [rsp+257*mmsize], m8
+    mova  [rsp+258*mmsize], m9
+    mova  [rsp+259*mmsize], m10
+    mova  [rsp+260*mmsize], m11
+    mova  [rsp+261*mmsize], m12
+    mova  [rsp+262*mmsize], m13
+    mova  [rsp+263*mmsize], m14
+    mova  [rsp+264*mmsize], m15
+%endif
+    mova  [rsp+265*mmsize], m0
+    mova  [rsp+266*mmsize], m1
+    mova  [rsp+267*mmsize], m2
+    mova  [rsp+268*mmsize], m3
+    mova  [rsp+269*mmsize], m4
+    mova  [rsp+270*mmsize], m5
+    mova  [rsp+271*mmsize], m6
+
+    ; r257-260: t0-3
+    ; r265-272: t4/5a/6a/7/8/9a/10/11a
+    ; r261-264: t12a/13/14a/15
+    ; r273-274 is free as scratch space, and 275-282 mirrors m8-15 on 32bit
+
+    mova                m0, [%2+ 1*%3]              ; in1
+    mova                m1, [%2+15*%3]              ; in15
+    mova                m2, [%2+17*%3]              ; in17
+    mova                m3, [%2+31*%3]              ; in31
+    SUMSUB_MUL           0, 3, 4, 5, 16364,  804    ; m0=t31a, m3=t16a
+    SUMSUB_MUL           2, 1, 4, 5, 11003, 12140   ; m2=t30a, m1=t17a
+    SUMSUB_BA         d, 1, 3, 4                    ; m1=t16, m3=t17
+    SUMSUB_BA         d, 2, 0, 4                    ; m2=t31, m0=t30
+    SUMSUB_MUL           0, 3, 4, 5, 16069,  3196   ; m0=t30a, m3=t17a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              2, 9, rsp+276*mmsize
+
+    ; end of stage 1-3 first quart
+
+    mova                m0, [%2+ 7*%3]              ; in7
+    mova                m2, [%2+ 9*%3]              ; in9
+    mova                m4, [%2+23*%3]              ; in23
+    mova                m5, [%2+25*%3]              ; in25
+    SUMSUB_MUL           2, 4, 6, 7, 14811,  7005   ; m2=t29a, m4=t18a
+    SUMSUB_MUL           5, 0, 6, 7,  5520, 15426   ; m5=t28a, m0=t19a
+    SUMSUB_BA         d, 4, 0, 6                    ; m4=t19, m0=t18
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=t28, m5=t29
+    SUMSUB_MUL           5, 0, 6, 7,  3196, m16069  ; m5=t29a, m0=t18a
+
+    ; end of stage 1-3 second quart
+
+    SUMSUB_BA         d, 4, 1, 6                    ; m4=t16a, m1=t19a
+    SUMSUB_BA         d, 0, 3, 6                    ; m0=t17, m3=t18
+    UNSCRATCH            6, 8, rsp+275*mmsize       ; t30a
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; t31
+    mova  [rsp+273*mmsize], m4
+    mova  [rsp+274*mmsize], m0
+    SUMSUB_BA         d, 2, 7, 0                    ; m2=t31a, m7=t28a
+    SUMSUB_BA         d, 5, 6, 0                    ; m5=t30, m6=t29
+    SUMSUB_MUL           6, 3, 0, 4, 15137,  6270   ; m6=t29a, m3=t18a
+    SUMSUB_MUL           7, 1, 0, 4, 15137,  6270   ; m7=t28, m1=t19
+    SCRATCH              3, 10, rsp+277*mmsize
+    SCRATCH              1, 11, rsp+278*mmsize
+    SCRATCH              7, 12, rsp+279*mmsize
+    SCRATCH              6, 13, rsp+280*mmsize
+    SCRATCH              5, 14, rsp+281*mmsize
+    SCRATCH              2, 15, rsp+282*mmsize
+
+    ; end of stage 4-5 first half
+
+    mova                m0, [%2+ 5*%3]              ; in5
+    mova                m1, [%2+11*%3]              ; in11
+    mova                m2, [%2+21*%3]              ; in21
+    mova                m3, [%2+27*%3]              ; in27
+    SUMSUB_MUL           0, 3, 4, 5, 15893,  3981   ; m0=t27a, m3=t20a
+    SUMSUB_MUL           2, 1, 4, 5,  8423, 14053   ; m2=t26a, m1=t21a
+    SUMSUB_BA         d, 1, 3, 4                    ; m1=t20, m3=t21
+    SUMSUB_BA         d, 2, 0, 4                    ; m2=t27, m0=t26
+    SUMSUB_MUL           0, 3, 4, 5,  9102, 13623   ; m0=t26a, m3=t21a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              2, 9, rsp+276*mmsize
+
+    ; end of stage 1-3 third quart
+
+    mova                m0, [%2+ 3*%3]              ; in3
+    mova                m2, [%2+13*%3]              ; in13
+    mova                m4, [%2+19*%3]              ; in19
+    mova                m5, [%2+29*%3]              ; in29
+    SUMSUB_MUL           2, 4, 6, 7, 13160,  9760   ; m2=t25a, m4=t22a
+    SUMSUB_MUL           5, 0, 6, 7,  2404, 16207   ; m5=t24a, m0=t23a
+    SUMSUB_BA         d, 4, 0, 6                    ; m4=t23, m0=t22
+    SUMSUB_BA         d, 2, 5, 6                    ; m2=t24, m5=t25
+    SUMSUB_MUL           5, 0, 6, 7, 13623, m9102   ; m5=t25a, m0=t22a
+
+    ; end of stage 1-3 fourth quart
+
+    SUMSUB_BA         d, 1, 4, 6                    ; m1=t23a, m4=t20a
+    SUMSUB_BA         d, 3, 0, 6                    ; m3=t22, m0=t21
+    UNSCRATCH            6, 8, rsp+275*mmsize       ; t26a
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; t27
+    SCRATCH              3, 8, rsp+275*mmsize
+    SCRATCH              1, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 7, 2, 1                    ; m7=t24a, m2=t27a
+    SUMSUB_BA         d, 6, 5, 1                    ; m6=t25, m5=t26
+    SUMSUB_MUL           2, 4, 1, 3,  6270, m15137  ; m2=t27, m4=t20
+    SUMSUB_MUL           5, 0, 1, 3,  6270, m15137  ; m5=t26a, m0=t21a
+
+    ; end of stage 4-5 second half
+
+    UNSCRATCH            1, 12, rsp+279*mmsize      ; t28
+    UNSCRATCH            3, 13, rsp+280*mmsize      ; t29a
+    SCRATCH              4, 12, rsp+279*mmsize
+    SCRATCH              0, 13, rsp+280*mmsize
+    SUMSUB_BA         d, 5, 3, 0                    ; m5=t29, m3=t26
+    SUMSUB_BA         d, 2, 1, 0                    ; m2=t28a, m1=t27a
+    UNSCRATCH            0, 14, rsp+281*mmsize      ; t30
+    UNSCRATCH            4, 15, rsp+282*mmsize      ; t31a
+    SCRATCH              2, 14, rsp+281*mmsize
+    SCRATCH              5, 15, rsp+282*mmsize
+    SUMSUB_BA         d, 6, 0, 2                    ; m6=t30a, m0=t25a
+    SUMSUB_BA         d, 7, 4, 2                    ; m7=t31, m4=t24
+
+    mova                m2, [rsp+273*mmsize]        ; t16a
+    mova                m5, [rsp+274*mmsize]        ; t17
+    mova  [rsp+273*mmsize], m6
+    mova  [rsp+274*mmsize], m7
+    UNSCRATCH            6, 10, rsp+277*mmsize      ; t18a
+    UNSCRATCH            7, 11, rsp+278*mmsize      ; t19
+    SCRATCH              4, 10, rsp+277*mmsize
+    SCRATCH              0, 11, rsp+278*mmsize
+    UNSCRATCH            4, 12, rsp+279*mmsize      ; t20
+    UNSCRATCH            0, 13, rsp+280*mmsize      ; t21a
+    SCRATCH              3, 12, rsp+279*mmsize
+    SCRATCH              1, 13, rsp+280*mmsize
+    SUMSUB_BA         d, 0, 6, 1                    ; m0=t18, m6=t21
+    SUMSUB_BA         d, 4, 7, 1                    ; m4=t19a, m7=t20a
+    UNSCRATCH            3, 8, rsp+275*mmsize       ; t22
+    UNSCRATCH            1, 9, rsp+276*mmsize       ; t23a
+    SCRATCH              0, 8, rsp+275*mmsize
+    SCRATCH              4, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 3, 5, 0                    ; m3=t17a, m5=t22a
+    SUMSUB_BA         d, 1, 2, 0                    ; m1=t16, m2=t23
+
+    ; end of stage 6
+
+    UNSCRATCH            0, 10, rsp+277*mmsize      ; t24
+    UNSCRATCH            4, 11, rsp+278*mmsize      ; t25a
+    SCRATCH              1, 10, rsp+277*mmsize
+    SCRATCH              3, 11, rsp+278*mmsize
+    SUMSUB_MUL           0, 2, 1, 3, 11585, 11585   ; m0=t24a, m2=t23a
+    SUMSUB_MUL           4, 5, 1, 3, 11585, 11585   ; m4=t25, m5=t22
+    UNSCRATCH            1, 12, rsp+279*mmsize      ; t26
+    UNSCRATCH            3, 13, rsp+280*mmsize      ; t27a
+    SCRATCH              0, 12, rsp+279*mmsize
+    SCRATCH              4, 13, rsp+280*mmsize
+    SUMSUB_MUL           3, 7, 0, 4, 11585, 11585   ; m3=t27, m7=t20
+    SUMSUB_MUL           1, 6, 0, 4, 11585, 11585   ; m1=t26a, m6=t21a
+
+    ; end of stage 7
+
+    mova                m0, [rsp+269*mmsize]        ; t8
+    mova                m4, [rsp+270*mmsize]        ; t9a
+    mova  [rsp+269*mmsize], m1                      ; t26a
+    mova  [rsp+270*mmsize], m3                      ; t27
+    mova                m3, [rsp+271*mmsize]        ; t10
+    SUMSUB_BA         d, 2, 0, 1                    ; m2=out8, m0=out23
+    SUMSUB_BA         d, 5, 4, 1                    ; m5=out9, m4=out22
+    SUMSUB_BA         d, 6, 3, 1                    ; m6=out10, m3=out21
+    mova                m1, [rsp+272*mmsize]        ; t11a
+    mova  [rsp+271*mmsize], m0
+    SUMSUB_BA         d, 7, 1, 0                    ; m7=out11, m1=out20
+
+%if %1 == 1
+    TRANSPOSE4x4D        2, 5, 6, 7, 0
+    mova  [ptrq+ 2*mmsize], m2
+    mova  [ptrq+10*mmsize], m5
+    mova  [ptrq+18*mmsize], m6
+    mova  [ptrq+26*mmsize], m7
+%else ; %1 == 2
+    pxor                m0, m0
+    lea               dstq, [dstq+strideq*8]
+    ROUND_AND_STORE_4x4  2, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+    mova                m2, [rsp+271*mmsize]
+%if %1 == 1
+    TRANSPOSE4x4D        1, 3, 4, 2, 0
+    mova  [ptrq+ 5*mmsize], m1
+    mova  [ptrq+13*mmsize], m3
+    mova  [ptrq+21*mmsize], m4
+    mova  [ptrq+29*mmsize], m2
+%else ; %1 == 2
+    lea               dstq, [dstq+stride3q*4]
+    ROUND_AND_STORE_4x4  1, 3, 4, 2, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out8-11 and out20-23
+
+    UNSCRATCH            0, 9, rsp+276*mmsize       ; t19a
+    UNSCRATCH            1, 8, rsp+275*mmsize       ; t18
+    UNSCRATCH            2, 11, rsp+278*mmsize      ; t17a
+    UNSCRATCH            3, 10, rsp+277*mmsize      ; t16
+    mova                m7, [rsp+261*mmsize]        ; t12a
+    mova                m6, [rsp+262*mmsize]        ; t13
+    mova                m5, [rsp+263*mmsize]        ; t14a
+    SUMSUB_BA         d, 0, 7, 4                    ; m0=out12, m7=out19
+    SUMSUB_BA         d, 1, 6, 4                    ; m1=out13, m6=out18
+    SUMSUB_BA         d, 2, 5, 4                    ; m2=out14, m5=out17
+    mova                m4, [rsp+264*mmsize]        ; t15
+    SCRATCH              7, 8, rsp+275*mmsize
+    SUMSUB_BA         d, 3, 4, 7                    ; m3=out15, m4=out16
+
+%if %1 == 1
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 3*mmsize], m0
+    mova  [ptrq+11*mmsize], m1
+    mova  [ptrq+19*mmsize], m2
+    mova  [ptrq+27*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 7, 9
+    lea               dstq, [dstbakq+stride3q*4]
+%else ; x86-32
+    pxor                m7, m7
+    mov               dstq, dstm
+    lea               dstq, [dstq+stride3q*4]
+%endif
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            0, 8, rsp+275*mmsize       ; out19
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 0, 7
+    mova  [ptrq+ 4*mmsize], m4
+    mova  [ptrq+12*mmsize], m5
+    mova  [ptrq+20*mmsize], m6
+    mova  [ptrq+28*mmsize], m0
+%else ; %1 == 2
+    lea               dstq, [dstq+strideq*4]
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out12-19
+
+%if ARCH_X86_64
+    SWAP                 7, 8
+%endif
+    mova                m7, [rsp+257*mmsize]        ; t0
+    mova                m6, [rsp+258*mmsize]        ; t1
+    mova                m5, [rsp+259*mmsize]        ; t2
+    mova                m4, [rsp+260*mmsize]        ; t3
+    mova                m0, [rsp+274*mmsize]        ; t31
+    mova                m1, [rsp+273*mmsize]        ; t30a
+    UNSCRATCH            2, 15, rsp+282*mmsize      ; t29
+    SUMSUB_BA         d, 0, 7, 3                    ; m0=out0, m7=out31
+    SUMSUB_BA         d, 1, 6, 3                    ; m1=out1, m6=out30
+    SUMSUB_BA         d, 2, 5, 3                    ; m2=out2, m5=out29
+    SCRATCH              0, 9, rsp+276*mmsize
+    UNSCRATCH            3, 14, rsp+281*mmsize      ; t28a
+    SUMSUB_BA         d, 3, 4, 0                    ; m3=out3, m4=out28
+
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 7, 0
+    mova  [ptrq+ 7*mmsize], m4
+    mova  [ptrq+15*mmsize], m5
+    mova  [ptrq+23*mmsize], m6
+    mova  [ptrq+31*mmsize], m7
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 0, 8
+%else ; x86-32
+    pxor                m0, m0
+%endif
+    lea               dstq, [dstq+stride3q*4]
+    ROUND_AND_STORE_4x4  4, 5, 6, 7, m0, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            7, 9, rsp+276*mmsize       ; out0
+%if %1 == 1
+    TRANSPOSE4x4D        7, 1, 2, 3, 0
+    mova  [ptrq+ 0*mmsize], m7
+    mova  [ptrq+ 8*mmsize], m1
+    mova  [ptrq+16*mmsize], m2
+    mova  [ptrq+24*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    DEFINE_ARGS dstbak, stride, block, cnt, ptr, stride3, dst
+%else ; x86-32
+    mov               dstq, dstm
+%endif
+    ROUND_AND_STORE_4x4  7, 1, 2, 3, m0, [rsp+256*mmsize], [pd_32], 6
+%if ARCH_X86_64
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+%endif
+%endif
+
+    ; end of last stage + store for out0-3 and out28-31
+
+%if ARCH_X86_64
+    SWAP                 0, 8
+%endif
+    mova                m7, [rsp+265*mmsize]        ; t4
+    mova                m6, [rsp+266*mmsize]        ; t5a
+    mova                m5, [rsp+267*mmsize]        ; t6a
+    mova                m4, [rsp+268*mmsize]        ; t7
+    mova                m0, [rsp+270*mmsize]        ; t27
+    mova                m1, [rsp+269*mmsize]        ; t26a
+    UNSCRATCH            2, 13, rsp+280*mmsize      ; t25
+    SUMSUB_BA         d, 0, 7, 3                    ; m0=out4, m7=out27
+    SUMSUB_BA         d, 1, 6, 3                    ; m1=out5, m6=out26
+    SUMSUB_BA         d, 2, 5, 3                    ; m2=out6, m5=out25
+    UNSCRATCH            3, 12, rsp+279*mmsize      ; t24a
+    SCRATCH              7, 9, rsp+276*mmsize
+    SUMSUB_BA         d, 3, 4, 7                    ; m3=out7, m4=out24
+
+%if %1 == 1
+    TRANSPOSE4x4D        0, 1, 2, 3, 7
+    mova  [ptrq+ 1*mmsize], m0
+    mova  [ptrq+ 9*mmsize], m1
+    mova  [ptrq+17*mmsize], m2
+    mova  [ptrq+25*mmsize], m3
+%else ; %1 == 2
+%if ARCH_X86_64
+    SWAP                 7, 8
+    lea               dstq, [dstbakq+strideq*4]
+%else ; x86-32
+    pxor                m7, m7
+    lea               dstq, [dstq+strideq*4]
+%endif
+    ROUND_AND_STORE_4x4  0, 1, 2, 3, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+    UNSCRATCH            0, 9, rsp+276*mmsize       ; out27
+%if %1 == 1
+    TRANSPOSE4x4D        4, 5, 6, 0, 7
+    mova  [ptrq+ 6*mmsize], m4
+    mova  [ptrq+14*mmsize], m5
+    mova  [ptrq+22*mmsize], m6
+    mova  [ptrq+30*mmsize], m0
+%else ; %1 == 2
+%if ARCH_X86_64
+    lea               dstq, [dstbakq+stride3q*8]
+%else
+    mov               dstq, dstm
+    lea               dstq, [dstq+stride3q*8]
+%endif
+    ROUND_AND_STORE_4x4  4, 5, 6, 0, m7, [rsp+256*mmsize], [pd_32], 6
+%endif
+
+    ; end of last stage + store for out4-7 and out24-27
+%endmacro
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \
+                                    275 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_1023]
+    cmp               eobd, 1
+    jg .idctfull
+
+    ; dc-only - the 10bit version can be done entirely in 32bit, since the max
+    ; coef values are 17+sign bit, and the coef is 14bit, so 31+sign easily
+    ; fits in 32bit
+    DEFINE_ARGS dst, stride, block, coef
+    pxor                m2, m2
+    DC_ONLY              6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 32
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,          mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+mmsize*2, mmsize
+    add               dstq, strideq
+    dec               cntd
+    jg .loop_dc
+    RET
+
+.idctfull:
+    mova  [rsp+256*mmsize], m0
+    DEFINE_ARGS dst, stride, block, cnt, ptr, skip, dstbak
+%if ARCH_X86_64
+    mov            dstbakq, dstq
+    movsxd            cntq, cntd
+%endif
+%ifdef PIC
+    lea               ptrq, [default_32x32]
+    movzx             cntd, byte [ptrq+cntq-1]
+%else
+    movzx             cntd, byte [default_32x32+cntq-1]
+%endif
+    mov              skipd, 8
+    sub              skipd, cntd
+    mov               ptrq, rsp
+.loop_1:
+    IDCT32_1D            1, blockq
+
+    add               ptrq, 32 * mmsize
+    add             blockq, mmsize
+    dec               cntd
+    jg .loop_1
+
+    ; zero-pad the remainder (skipped cols)
+    test             skipd, skipd
+    jz .end
+    shl              skipd, 2
+    lea             blockq, [blockq+skipq*(mmsize/4)]
+    pxor                m0, m0
+.loop_z:
+    mova   [ptrq+mmsize*0], m0
+    mova   [ptrq+mmsize*1], m0
+    mova   [ptrq+mmsize*2], m0
+    mova   [ptrq+mmsize*3], m0
+    mova   [ptrq+mmsize*4], m0
+    mova   [ptrq+mmsize*5], m0
+    mova   [ptrq+mmsize*6], m0
+    mova   [ptrq+mmsize*7], m0
+    add               ptrq, 8 * mmsize
+    dec              skipd
+    jg .loop_z
+.end:
+
+    DEFINE_ARGS dst, stride, block, cnt, ptr, stride3, dstbak
+    lea           stride3q, [strideq*3]
+    mov               cntd, 8
+    mov               ptrq, rsp
+.loop_2:
+    IDCT32_1D            2, ptrq
+
+    add               ptrq, mmsize
+%if ARCH_X86_64
+    add            dstbakq, 8
+    mov               dstq, dstbakq
+%else
+    add         dword dstm, 8
+    mov               dstq, dstm
+%endif
+    dec               cntd
+    jg .loop_2
+
+    ; m7 is still zero
+    ZERO_BLOCK blockq-8*mmsize, 128, 32, m7
+    RET
+
+INIT_XMM sse2
+cglobal vp9_idct_idct_32x32_add_12, 4, 6 + ARCH_X86_64, 16, \
+                                    275 * mmsize + ARCH_X86_32 * 8 * mmsize, \
+                                    dst, stride, block, eob
+    mova                m0, [pw_4095]
+    cmp               eobd, 1
+    jg mangle(private_prefix %+ _ %+ vp9_idct_idct_32x32_add_10 %+ SUFFIX).idctfull
+
+    ; dc-only - unfortunately, this one can overflow, since coefs are 19+sign
+    ; bpp, and 19+14+sign does not fit in 32bit, so we do 2-stage multiplies
+    DEFINE_ARGS dst, stride, block, coef, coefl
+    pxor                m2, m2
+    DC_ONLY_64BIT        6, m2
+    movd                m1, coefd
+    pshuflw             m1, m1, q0000
+    punpcklqdq          m1, m1
+    DEFINE_ARGS dst, stride, cnt
+    mov               cntd, 32
+.loop_dc:
+    STORE_2x8            3, 4, 1, m2, m0, dstq,          mmsize
+    STORE_2x8            3, 4, 1, m2, m0, dstq+mmsize*2, mmsize
+    add               dstq, strideq
+    dec               cntd
+    jg .loop_dc
+    RET
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm_template.asm b/media/ffvpx/libavcodec/x86/vp9itxfm_template.asm
new file mode 100644
index 000000000..d2f2257d8
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm_template.asm
@@ -0,0 +1,142 @@
+;******************************************************************************
+;* VP9 IDCT SIMD optimizations
+;*
+;* Copyright (C) 2013 Clément Bœsch <u pkh me>
+;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%macro VP9_IWHT4_1D 0
+    SWAP                 1, 2, 3
+    paddw               m0, m2
+    psubw               m3, m1
+    psubw               m4, m0, m3
+    psraw               m4, 1
+    psubw               m5, m4, m1
+    SWAP                 5, 1
+    psubw               m4, m2
+    SWAP                 4, 2
+    psubw               m0, m1
+    paddw               m3, m2
+    SWAP                 3, 2, 1
+%endmacro
+
+; (a*x + b*y + round) >> shift
+%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2
+    pmaddwd            m%1, m%2, %4
+    pmaddwd            m%2,  %5
+    paddd              m%1,  %3
+    paddd              m%2,  %3
+    psrad              m%1,  14
+    psrad              m%2,  14
+%endmacro
+
+%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2
+    VP9_MULSUB_2W_2X    %7,  %6,  %5, [pw_m%3_%4], [pw_%4_%3]
+    VP9_MULSUB_2W_2X    %1,  %2,  %5, [pw_m%3_%4], [pw_%4_%3]
+    packssdw           m%1, m%7
+    packssdw           m%2, m%6
+%endmacro
+
+%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2
+%if %0 == 7
+    punpckhwd          m%6, m%2, m%1
+    punpcklwd          m%2, m%1
+    VP9_MULSUB_2W_4X   %1, %2, %3, %4, %5, %6, %7
+%else
+    punpckhwd          m%8, m%4, m%3
+    punpcklwd          m%2, m%4, m%3
+    VP9_MULSUB_2W_4X   %1, %2, %5, %6, %7, %8, %9
+%endif
+%endmacro
+
+%macro VP9_IDCT4_1D_FINALIZE 0
+    SUMSUB_BA            w, 3, 2, 4                         ; m3=t3+t0, m2=-t3+t0
+    SUMSUB_BA            w, 1, 0, 4                         ; m1=t2+t1, m0=-t2+t1
+    SWAP                 0, 3, 2                            ; 3102 -> 0123
+%endmacro
+
+%macro VP9_IDCT4_1D 0
+%if cpuflag(ssse3)
+    SUMSUB_BA            w, 2, 0, 4                         ; m2=IN(0)+IN(2) m0=IN(0)-IN(2)
+    pmulhrsw            m2, m6                              ; m2=t0
+    pmulhrsw            m0, m6                              ; m0=t1
+%else ; <= sse2
+    VP9_UNPACK_MULSUB_2W_4X 0, 2, 11585, 11585, m7, 4, 5    ; m0=t1, m1=t0
+%endif
+    VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5     ; m1=t2, m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+%macro VP9_IADST4_1D 0
+    movq2dq           xmm0, m0
+    movq2dq           xmm1, m1
+    movq2dq           xmm2, m2
+    movq2dq           xmm3, m3
+%if cpuflag(ssse3)
+    paddw               m3, m0
+%endif
+    punpcklwd         xmm0, xmm1
+    punpcklwd         xmm2, xmm3
+    pmaddwd           xmm1, xmm0, [pw_5283_13377]
+    pmaddwd           xmm4, xmm0, [pw_9929_13377]
+%if notcpuflag(ssse3)
+    pmaddwd           xmm6, xmm0, [pw_13377_0]
+%endif
+    pmaddwd           xmm0, [pw_15212_m13377]
+    pmaddwd           xmm3, xmm2, [pw_15212_9929]
+%if notcpuflag(ssse3)
+    pmaddwd           xmm7, xmm2, [pw_m13377_13377]
+%endif
+    pmaddwd           xmm2, [pw_m5283_m15212]
+%if cpuflag(ssse3)
+    psubw               m3, m2
+%else
+    paddd             xmm6, xmm7
+%endif
+    paddd             xmm0, xmm2
+    paddd             xmm3, xmm5
+    paddd             xmm2, xmm5
+%if notcpuflag(ssse3)
+    paddd             xmm6, xmm5
+%endif
+    paddd             xmm1, xmm3
+    paddd             xmm0, xmm3
+    paddd             xmm4, xmm2
+    psrad             xmm1, 14
+    psrad             xmm0, 14
+    psrad             xmm4, 14
+%if cpuflag(ssse3)
+    pmulhrsw            m3, [pw_13377x2]        ; out2
+%else
+    psrad             xmm6, 14
+%endif
+    packssdw          xmm0, xmm0
+    packssdw          xmm1, xmm1
+    packssdw          xmm4, xmm4
+%if notcpuflag(ssse3)
+    packssdw          xmm6, xmm6
+%endif
+    movdq2q             m0, xmm0                ; out3
+    movdq2q             m1, xmm1                ; out0
+    movdq2q             m2, xmm4                ; out1
+%if notcpuflag(ssse3)
+    movdq2q             m3, xmm6                ; out2
+%endif
+    SWAP                 0, 1, 2, 3
+%endmacro
diff --git a/media/ffvpx/libavcodec/x86/vp9lpf.asm b/media/ffvpx/libavcodec/x86/vp9lpf.asm
new file mode 100644
index 000000000..2c4fe214d
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9lpf.asm
@@ -0,0 +1,1139 @@
+;******************************************************************************
+;* VP9 loop filter SIMD optimizations
+;*
+;* Copyright (C) 2013-2014 Clément Bœsch <u pkh me>
+;* Copyright (C) 2014 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pb_3
+cextern pb_80
+
+pb_4:   times 16 db 0x04
+pb_10:  times 16 db 0x10
+pb_40:  times 16 db 0x40
+pb_81:  times 16 db 0x81
+pb_f8:  times 16 db 0xf8
+pb_fe:  times 16 db 0xfe
+pb_ff:  times 16 db 0xff
+
+cextern pw_4
+cextern pw_8
+
+; with mix functions, two 8-bit thresholds are stored in a 16-bit storage,
+; the following mask is used to splat both in the same register
+mask_mix: times 8 db 0
+          times 8 db 1
+
+mask_mix84: times 8 db 0xff
+            times 8 db 0x00
+mask_mix48: times 8 db 0x00
+            times 8 db 0xff
+
+SECTION .text
+
+%macro SCRATCH 3
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova              [%3], m%1
+%endif
+%endmacro
+
+%macro UNSCRATCH 3
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%endmacro
+
+; %1 = abs(%2-%3)
+%macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp
+%if ARCH_X86_64
+    psubusb             %1, %3, %2
+    psubusb             %4, %2, %3
+%else
+    mova                %1, %3
+    mova                %4, %2
+    psubusb             %1, %2
+    psubusb             %4, %3
+%endif
+    por                 %1, %4
+%endmacro
+
+; %1 = %1>%2
+%macro CMP_GT 2-3 ; src/dst, cmp, pb_80
+%if %0 == 3
+    pxor                %1, %3
+%endif
+    pcmpgtb             %1, %2
+%endmacro
+
+; %1 = abs(%2-%3) > %4
+%macro ABSSUB_GT 5-6 [pb_80]; dst, src1, src2, cmp, tmp, [pb_80]
+    ABSSUB              %1, %2, %3, %5      ; dst = abs(src1-src2)
+    CMP_GT              %1, %4, %6          ; dst > cmp
+%endmacro
+
+%macro MASK_APPLY 4 ; %1=new_data/dst %2=old_data %3=mask %4=tmp
+    pand                %1, %3              ; new &= mask
+    pandn               %4, %3, %2          ; tmp = ~mask & old
+    por                 %1, %4              ; new&mask | old&~mask
+%endmacro
+
+%macro UNPACK 4
+%if ARCH_X86_64
+    punpck%1bw          %2, %3, %4
+%else
+    mova                %2, %3
+    punpck%1bw          %2, %4
+%endif
+%endmacro
+
+%macro FILTER_SUBx2_ADDx2 11 ; %1=dst %2=h/l %3=cache %4=stack_off %5=sub1 %6=sub2 %7=add1
+                             ; %8=add2 %9=rshift, [unpack], [unpack_is_mem_on_x86_32]
+    psubw               %3, [rsp+%4+%5*32]
+    psubw               %3, [rsp+%4+%6*32]
+    paddw               %3, [rsp+%4+%7*32]
+%ifnidn %10, ""
+%if %11 == 0
+    punpck%2bw          %1, %10, m0
+%else
+    UNPACK          %2, %1, %10, m0
+%endif
+    mova    [rsp+%4+%8*32], %1
+    paddw               %3, %1
+%else
+    paddw               %3, [rsp+%4+%8*32]
+%endif
+    psraw               %1, %3, %9
+%endmacro
+
+; FIXME interleave l/h better (for instruction pairing)
+%macro FILTER_INIT 9 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, filterid, mask, source
+    FILTER%7_INIT       %1, l, %3, %6 +  0
+    FILTER%7_INIT       %2, h, %4, %6 + 16
+    packuswb            %1, %2
+    MASK_APPLY          %1, %9, %8, %2
+    mova                %5, %1
+%endmacro
+
+
+%macro FILTER_UPDATE 12-16 "", "", "", 0 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, -, -, +, +, rshift,
+                                         ; mask, [source], [unpack + src], [unpack_is_mem_on_x86_32]
+; FIXME interleave this properly with the subx2/addx2
+%ifnidn %15, ""
+%if %16 == 0 || ARCH_X86_64
+    mova               %14, %15
+%endif
+%endif
+    FILTER_SUBx2_ADDx2  %1, l, %3, %6 +  0, %7, %8, %9, %10, %11, %14, %16
+    FILTER_SUBx2_ADDx2  %2, h, %4, %6 + 16, %7, %8, %9, %10, %11, %14, %16
+    packuswb            %1, %2
+%ifnidn %13, ""
+    MASK_APPLY          %1, %13, %12, %2
+%else
+    MASK_APPLY          %1, %5, %12, %2
+%endif
+    mova                %5, %1
+%endmacro
+
+%macro SRSHIFT3B_2X 4 ; reg1, reg2, [pb_10], tmp
+    mova                %4, [pb_f8]
+    pand                %1, %4
+    pand                %2, %4
+    psrlq               %1, 3
+    psrlq               %2, 3
+    pxor                %1, %3
+    pxor                %2, %3
+    psubb               %1, %3
+    psubb               %2, %3
+%endmacro
+
+%macro EXTRACT_POS_NEG 3 ; i8, neg, pos
+    pxor                %3, %3
+    pxor                %2, %2
+    pcmpgtb             %3, %1                          ; i8 < 0 mask
+    psubb               %2, %1                          ; neg values (only the originally - will be kept)
+    pand                %2, %3                          ; negative values of i8 (but stored as +)
+    pandn               %3, %1                          ; positive values of i8
+%endmacro
+
+; clip_u8(u8 + i8)
+%macro SIGN_ADD 4 ; dst, u8, i8, tmp1
+    EXTRACT_POS_NEG     %3, %4, %1
+    paddusb             %1, %2                          ; add the positives
+    psubusb             %1, %4                          ; sub the negatives
+%endmacro
+
+; clip_u8(u8 - i8)
+%macro SIGN_SUB 4 ; dst, u8, i8, tmp1
+    EXTRACT_POS_NEG     %3, %1, %4
+    paddusb             %1, %2                          ; add the negatives
+    psubusb             %1, %4                          ; sub the positives
+%endmacro
+
+%macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
+    UNPACK          %2, %1, rp3, m0                     ; p3: B->W
+    mova     [rsp+%4+0*32], %1
+    paddw               %3, %1, %1                      ; p3*2
+    paddw               %3, %1                          ; p3*3
+    punpck%2bw          %1, m1,  m0                     ; p2: B->W
+    mova     [rsp+%4+1*32], %1
+    paddw               %3, %1                          ; p3*3 + p2
+    paddw               %3, %1                          ; p3*3 + p2*2
+    UNPACK          %2, %1, rp1, m0                     ; p1: B->W
+    mova     [rsp+%4+2*32], %1
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1
+    UNPACK          %2, %1, rp0, m0                     ; p0: B->W
+    mova     [rsp+%4+3*32], %1
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0
+    UNPACK          %2, %1, rq0, m0                     ; q0: B->W
+    mova     [rsp+%4+4*32], %1
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0 + q0
+    paddw               %3, [pw_4]                      ; p3*3 + p2*2 + p1 + p0 + q0 + 4
+    psraw               %1, %3, 3                       ; (p3*3 + p2*2 + p1 + p0 + q0 + 4) >> 3
+%endmacro
+
+%macro FILTER14_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
+    punpck%2bw          %1, m2, m0                      ; p7: B->W
+    mova    [rsp+%4+ 8*32], %1
+    psllw               %3, %1, 3                       ; p7*8
+    psubw               %3, %1                          ; p7*7
+    punpck%2bw          %1, m3, m0                      ; p6: B->W
+    mova    [rsp+%4+ 9*32], %1
+    paddw               %3, %1                          ; p7*7 + p6
+    paddw               %3, %1                          ; p7*7 + p6*2
+    UNPACK          %2, %1, rp5, m0                     ; p5: B->W
+    mova    [rsp+%4+10*32], %1
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5
+    UNPACK          %2, %1, rp4, m0                     ; p4: B->W
+    mova    [rsp+%4+11*32], %1
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + p4
+    paddw               %3, [rsp+%4+ 0*32]              ; p7*7 + p6*2 + p5 + p4 + p3
+    paddw               %3, [rsp+%4+ 1*32]              ; p7*7 + p6*2 + p5 + .. + p2
+    paddw               %3, [rsp+%4+ 2*32]              ; p7*7 + p6*2 + p5 + .. + p1
+    paddw               %3, [rsp+%4+ 3*32]              ; p7*7 + p6*2 + p5 + .. + p0
+    paddw               %3, [rsp+%4+ 4*32]              ; p7*7 + p6*2 + p5 + .. + p0 + q0
+    paddw               %3, [pw_8]                      ; p7*7 + p6*2 + p5 + .. + p0 + q0 + 8
+    psraw               %1, %3, 4                       ; (p7*7 + p6*2 + p5 + .. + p0 + q0 + 8) >> 4
+%endmacro
+
+%macro TRANSPOSE16x16B 17
+    mova %17, m%16
+    SBUTTERFLY bw,  %1,  %2,  %16
+    SBUTTERFLY bw,  %3,  %4,  %16
+    SBUTTERFLY bw,  %5,  %6,  %16
+    SBUTTERFLY bw,  %7,  %8,  %16
+    SBUTTERFLY bw,  %9,  %10, %16
+    SBUTTERFLY bw,  %11, %12, %16
+    SBUTTERFLY bw,  %13, %14, %16
+    mova m%16,  %17
+    mova  %17, m%14
+    SBUTTERFLY bw,  %15, %16, %14
+    SBUTTERFLY wd,  %1,  %3,  %14
+    SBUTTERFLY wd,  %2,  %4,  %14
+    SBUTTERFLY wd,  %5,  %7,  %14
+    SBUTTERFLY wd,  %6,  %8,  %14
+    SBUTTERFLY wd,  %9,  %11, %14
+    SBUTTERFLY wd,  %10, %12, %14
+    SBUTTERFLY wd,  %13, %15, %14
+    mova m%14,  %17
+    mova  %17, m%12
+    SBUTTERFLY wd,  %14, %16, %12
+    SBUTTERFLY dq,  %1,  %5,  %12
+    SBUTTERFLY dq,  %2,  %6,  %12
+    SBUTTERFLY dq,  %3,  %7,  %12
+    SBUTTERFLY dq,  %4,  %8,  %12
+    SBUTTERFLY dq,  %9,  %13, %12
+    SBUTTERFLY dq,  %10, %14, %12
+    SBUTTERFLY dq,  %11, %15, %12
+    mova m%12, %17
+    mova  %17, m%8
+    SBUTTERFLY dq,  %12, %16, %8
+    SBUTTERFLY qdq, %1,  %9,  %8
+    SBUTTERFLY qdq, %2,  %10, %8
+    SBUTTERFLY qdq, %3,  %11, %8
+    SBUTTERFLY qdq, %4,  %12, %8
+    SBUTTERFLY qdq, %5,  %13, %8
+    SBUTTERFLY qdq, %6,  %14, %8
+    SBUTTERFLY qdq, %7,  %15, %8
+    mova m%8, %17
+    mova %17, m%1
+    SBUTTERFLY qdq, %8,  %16, %1
+    mova m%1, %17
+    SWAP %2,  %9
+    SWAP %3,  %5
+    SWAP %4,  %13
+    SWAP %6,  %11
+    SWAP %8,  %15
+    SWAP %12, %14
+%endmacro
+
+%macro TRANSPOSE8x8B 13
+    SBUTTERFLY bw,  %1, %2, %7
+    movdq%10 m%7, %9
+    movdqa %11, m%2
+    SBUTTERFLY bw,  %3, %4, %2
+    SBUTTERFLY bw,  %5, %6, %2
+    SBUTTERFLY bw,  %7, %8, %2
+    SBUTTERFLY wd,  %1, %3, %2
+    movdqa m%2, %11
+    movdqa %11, m%3
+    SBUTTERFLY wd,  %2, %4, %3
+    SBUTTERFLY wd,  %5, %7, %3
+    SBUTTERFLY wd,  %6, %8, %3
+    SBUTTERFLY dq, %1, %5, %3
+    SBUTTERFLY dq, %2, %6, %3
+    movdqa m%3, %11
+    movh   %12, m%2
+    movhps %13, m%2
+    SBUTTERFLY dq, %3, %7, %2
+    SBUTTERFLY dq, %4, %8, %2
+    SWAP %2, %5
+    SWAP %4, %7
+%endmacro
+
+%macro DEFINE_REAL_P7_TO_Q7 0-1 0
+%define P7 dstq  + 4*mstrideq  + %1
+%define P6 dstq  +   mstride3q + %1
+%define P5 dstq  + 2*mstrideq  + %1
+%define P4 dstq  +   mstrideq  + %1
+%define P3 dstq                + %1
+%define P2 dstq  +    strideq  + %1
+%define P1 dstq  + 2* strideq  + %1
+%define P0 dstq  +    stride3q + %1
+%define Q0 dstq  + 4* strideq  + %1
+%define Q1 dst2q +   mstride3q + %1
+%define Q2 dst2q + 2*mstrideq  + %1
+%define Q3 dst2q +   mstrideq  + %1
+%define Q4 dst2q               + %1
+%define Q5 dst2q +    strideq  + %1
+%define Q6 dst2q + 2* strideq  + %1
+%define Q7 dst2q +    stride3q + %1
+%endmacro
+
+%macro DEFINE_TRANSPOSED_P7_TO_Q7 0-1 0
+%define P3 rsp +   0 + %1
+%define P2 rsp +  16 + %1
+%define P1 rsp +  32 + %1
+%define P0 rsp +  48 + %1
+%define Q0 rsp +  64 + %1
+%define Q1 rsp +  80 + %1
+%define Q2 rsp +  96 + %1
+%define Q3 rsp + 112 + %1
+%define P7 rsp + 128 + %1
+%define P6 rsp + 144 + %1
+%define P5 rsp + 160 + %1
+%define P4 rsp + 176 + %1
+%define Q4 rsp + 192 + %1
+%define Q5 rsp + 208 + %1
+%define Q6 rsp + 224 + %1
+%define Q7 rsp + 240 + %1
+%endmacro
+
+; ..............AB -> AAAAAAAABBBBBBBB
+%macro SPLATB_MIX 1-2 [mask_mix]
+%if cpuflag(ssse3)
+    pshufb     %1, %2
+%else
+    punpcklbw  %1, %1
+    punpcklwd  %1, %1
+    punpckldq  %1, %1
+%endif
+%endmacro
+
+%macro LOOPFILTER 5 ; %1=v/h %2=size1 %3+%4=stack, %5=32bit stack only
+%if UNIX64
+cglobal vp9_loop_filter_%1_%2_16, 5, 9, 16, %3 + %4, dst, stride, E, I, H, mstride, dst2, stride3, mstride3
+%else
+%if WIN64
+cglobal vp9_loop_filter_%1_%2_16, 4, 8, 16, %3 + %4, dst, stride, E, I, mstride, dst2, stride3, mstride3
+%else
+cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, dst, stride, mstride, dst2, stride3, mstride3
+%define Ed dword r2m
+%define Id dword r3m
+%endif
+%define Hd dword r4m
+%endif
+
+    mov               mstrideq, strideq
+    neg               mstrideq
+
+    lea               stride3q, [strideq*3]
+    lea              mstride3q, [mstrideq*3]
+
+%ifidn %1, h
+%if %2 > 16
+%define movx movh
+    lea                   dstq, [dstq + 4*strideq - 4]
+%else
+%define movx movu
+    lea                   dstq, [dstq + 4*strideq - 8] ; go from top center (h pos) to center left (v pos)
+%endif
+    lea                  dst2q, [dstq + 8*strideq]
+%else
+    lea                   dstq, [dstq + 4*mstrideq]
+    lea                  dst2q, [dstq + 8*strideq]
+%endif
+
+    DEFINE_REAL_P7_TO_Q7
+
+%ifidn %1, h
+    movx                    m0, [P7]
+    movx                    m1, [P6]
+    movx                    m2, [P5]
+    movx                    m3, [P4]
+    movx                    m4, [P3]
+    movx                    m5, [P2]
+%if ARCH_X86_64 || %2 != 16
+    movx                    m6, [P1]
+%endif
+    movx                    m7, [P0]
+%if ARCH_X86_64
+    movx                    m8, [Q0]
+    movx                    m9, [Q1]
+    movx                   m10, [Q2]
+    movx                   m11, [Q3]
+    movx                   m12, [Q4]
+    movx                   m13, [Q5]
+    movx                   m14, [Q6]
+    movx                   m15, [Q7]
+    DEFINE_TRANSPOSED_P7_TO_Q7
+%if %2 == 16
+    TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+    mova           [P7],  m0
+    mova           [P6],  m1
+    mova           [P5],  m2
+    mova           [P4],  m3
+%else ; %2 == 44/48/84/88
+    ; 8x16 transpose
+    punpcklbw        m0,  m1
+    punpcklbw        m2,  m3
+    punpcklbw        m4,  m5
+    punpcklbw        m6,  m7
+    punpcklbw        m8,  m9
+    punpcklbw       m10, m11
+    punpcklbw       m12, m13
+    punpcklbw       m14, m15
+    TRANSPOSE8x8W     0, 2, 4, 6, 8, 10, 12, 14, 15
+    SWAP              0,  4
+    SWAP              2,  5
+    SWAP              0,  6
+    SWAP              0,  7
+    SWAP             10,  9
+    SWAP             12, 10
+    SWAP             14, 11
+%endif ; %2
+    mova           [P3],  m4
+    mova           [P2],  m5
+    mova           [P1],  m6
+    mova           [P0],  m7
+    mova           [Q0],  m8
+    mova           [Q1],  m9
+    mova           [Q2], m10
+    mova           [Q3], m11
+%if %2 == 16
+    mova           [Q4], m12
+    mova           [Q5], m13
+    mova           [Q6], m14
+    mova           [Q7], m15
+%endif ; %2
+%else ; x86-32
+%if %2 == 16
+    TRANSPOSE8x8B    0, 1, 2, 3, 4, 5, 6, 7, [P1], u, [rsp+%3+%4], [rsp+64], [rsp+80]
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    movh          [P7], m0
+    movh          [P5], m1
+    movh          [P3], m2
+    movh          [P1], m3
+    movh          [Q2], m5
+    movh          [Q4], m6
+    movh          [Q6], m7
+    movhps        [P6], m0
+    movhps        [P4], m1
+    movhps        [P2], m2
+    movhps        [P0], m3
+    movhps        [Q3], m5
+    movhps        [Q5], m6
+    movhps        [Q7], m7
+    DEFINE_REAL_P7_TO_Q7
+    movx                    m0, [Q0]
+    movx                    m1, [Q1]
+    movx                    m2, [Q2]
+    movx                    m3, [Q3]
+    movx                    m4, [Q4]
+    movx                    m5, [Q5]
+    movx                    m7, [Q7]
+    TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [Q6], u, [rsp+%3+%4], [rsp+72], [rsp+88]
+    DEFINE_TRANSPOSED_P7_TO_Q7 8
+    movh          [P7], m0
+    movh          [P5], m1
+    movh          [P3], m2
+    movh          [P1], m3
+    movh          [Q2], m5
+    movh          [Q4], m6
+    movh          [Q6], m7
+    movhps        [P6], m0
+    movhps        [P4], m1
+    movhps        [P2], m2
+    movhps        [P0], m3
+    movhps        [Q3], m5
+    movhps        [Q5], m6
+    movhps        [Q7], m7
+    DEFINE_TRANSPOSED_P7_TO_Q7
+%else ; %2 == 44/48/84/88
+    punpcklbw        m0, m1
+    punpcklbw        m2, m3
+    punpcklbw        m4, m5
+    punpcklbw        m6, m7
+    movx             m1, [Q0]
+    movx             m3, [Q1]
+    movx             m5, [Q2]
+    movx             m7, [Q3]
+    punpcklbw        m1, m3
+    punpcklbw        m5, m7
+    movx             m3, [Q4]
+    movx             m7, [Q5]
+    punpcklbw        m3, m7
+    mova          [rsp], m3
+    movx             m3, [Q6]
+    movx             m7, [Q7]
+    punpcklbw        m3, m7
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    TRANSPOSE8x8W     0, 2, 4, 6, 1, 5, 7, 3, [rsp], [Q0], 1
+    mova           [P3],  m0
+    mova           [P2],  m2
+    mova           [P1],  m4
+    mova           [P0],  m6
+    mova           [Q1],  m5
+    mova           [Q2],  m7
+    mova           [Q3],  m3
+%endif ; %2
+%endif ; x86-32/64
+%endif ; %1 == h
+
+    ; calc fm mask
+%if %2 == 16
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m2, I, m0                       ; I I I I ...
+    SPLATB_REG          m3, E, m0                       ; E E E E ...
+%else
+%if cpuflag(ssse3)
+    mova                m0, [mask_mix]
+%endif
+    movd                m2, Id
+    movd                m3, Ed
+    SPLATB_MIX          m2, m0
+    SPLATB_MIX          m3, m0
+%endif
+    mova                m0, [pb_80]
+    pxor                m2, m0
+    pxor                m3, m0
+%if ARCH_X86_64
+%ifidn %1, v
+    mova                m8, [P3]
+    mova                m9, [P2]
+    mova               m10, [P1]
+    mova               m11, [P0]
+    mova               m12, [Q0]
+    mova               m13, [Q1]
+    mova               m14, [Q2]
+    mova               m15, [Q3]
+%else
+    ; In case of horizontal, P3..Q3 are already present in some registers due
+    ; to the previous transpose, so we just swap registers.
+    SWAP                 8,  4, 12
+    SWAP                 9,  5, 13
+    SWAP                10,  6, 14
+    SWAP                11,  7, 15
+%endif
+%define rp3 m8
+%define rp2 m9
+%define rp1 m10
+%define rp0 m11
+%define rq0 m12
+%define rq1 m13
+%define rq2 m14
+%define rq3 m15
+%else
+%define rp3 [P3]
+%define rp2 [P2]
+%define rp1 [P1]
+%define rp0 [P0]
+%define rq0 [Q0]
+%define rq1 [Q1]
+%define rq2 [Q2]
+%define rq3 [Q3]
+%endif
+    ABSSUB_GT           m5, rp3, rp2, m2, m7, m0        ; m5 = abs(p3-p2) <= I
+    ABSSUB_GT           m1, rp2, rp1, m2, m7, m0        ; m1 = abs(p2-p1) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rp1, rp0, m2, m7, m0        ; m1 = abs(p1-p0) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rq0, rq1, m2, m7, m0        ; m1 = abs(q1-q0) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rq1, rq2, m2, m7, m0        ; m1 = abs(q2-q1) <= I
+    por                 m5, m1
+    ABSSUB_GT           m1, rq2, rq3, m2, m7, m0        ; m1 = abs(q3-q2) <= I
+    por                 m5, m1
+    ABSSUB              m1, rp0, rq0, m7                ; abs(p0-q0)
+    paddusb             m1, m1                          ; abs(p0-q0) * 2
+    ABSSUB              m2, rp1, rq1, m7                ; abs(p1-q1)
+    pand                m2, [pb_fe]                     ; drop lsb so shift can work
+    psrlq               m2, 1                           ; abs(p1-q1)/2
+    paddusb             m1, m2                          ; abs(p0-q0)*2 + abs(p1-q1)/2
+    pxor                m1, m0
+    pcmpgtb             m1, m3
+    por                 m1, m5                          ; fm final value
+    SWAP                 1, 3
+    pxor                m3, [pb_ff]
+
+    ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
+    ; calc flat8in (if not 44_16) and hev masks
+%if %2 != 44
+    mova                m6, [pb_81]                     ; [1 1 1 1 ...] ^ 0x80
+    ABSSUB_GT           m2, rp3, rp0, m6, m5            ; abs(p3 - p0) <= 1
+%if ARCH_X86_64
+    mova                m8, [pb_80]
+%define rb80 m8
+%else
+%define rb80 [pb_80]
+%endif
+    ABSSUB_GT           m1, rp2, rp0, m6, m5, rb80      ; abs(p2 - p0) <= 1
+    por                 m2, m1
+    ABSSUB              m4, rp1, rp0, m5                ; abs(p1 - p0)
+%if %2 == 16
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m7, H, m0                       ; H H H H ...
+%else
+    movd                m7, Hd
+    SPLATB_MIX          m7
+%endif
+    pxor                m7, rb80
+    pxor                m4, rb80
+    pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
+    CMP_GT              m4, m6                          ; abs(p1 - p0) <= 1
+    por                 m2, m4                          ; (flat8in)
+    ABSSUB              m4, rq1, rq0, m1                ; abs(q1 - q0)
+    pxor                m4, rb80
+    pcmpgtb             m5, m4, m7                      ; abs(q1 - q0) > H (2/2 hev condition)
+    por                 m0, m5                          ; hev final value
+    CMP_GT              m4, m6                          ; abs(q1 - q0) <= 1
+    por                 m2, m4                          ; (flat8in)
+    ABSSUB_GT           m1, rq2, rq0, m6, m5, rb80      ; abs(q2 - q0) <= 1
+    por                 m2, m1
+    ABSSUB_GT           m1, rq3, rq0, m6, m5, rb80      ; abs(q3 - q0) <= 1
+    por                 m2, m1                          ; flat8in final value
+    pxor                m2, [pb_ff]
+%if %2 == 84 || %2 == 48
+    pand                m2, [mask_mix%2]
+%endif
+%else
+    mova                m6, [pb_80]
+    movd                m7, Hd
+    SPLATB_MIX          m7
+    pxor                m7, m6
+    ABSSUB              m4, rp1, rp0, m1                ; abs(p1 - p0)
+    pxor                m4, m6
+    pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
+    ABSSUB              m4, rq1, rq0, m1                ; abs(q1 - q0)
+    pxor                m4, m6
+    pcmpgtb             m5, m4, m7                      ; abs(q1 - q0) > H (2/2 hev condition)
+    por                 m0, m5                          ; hev final value
+%endif
+
+%if %2 == 16
+    ; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
+    ; calc flat8out mask
+%if ARCH_X86_64
+    mova                m8, [P7]
+    mova                m9, [P6]
+%define rp7 m8
+%define rp6 m9
+%else
+%define rp7 [P7]
+%define rp6 [P6]
+%endif
+    ABSSUB_GT           m1, rp7, rp0, m6, m5            ; abs(p7 - p0) <= 1
+    ABSSUB_GT           m7, rp6, rp0, m6, m5            ; abs(p6 - p0) <= 1
+    por                 m1, m7
+%if ARCH_X86_64
+    mova                m8, [P5]
+    mova                m9, [P4]
+%define rp5 m8
+%define rp4 m9
+%else
+%define rp5 [P5]
+%define rp4 [P4]
+%endif
+    ABSSUB_GT           m7, rp5, rp0, m6, m5            ; abs(p5 - p0) <= 1
+    por                 m1, m7
+    ABSSUB_GT           m7, rp4, rp0, m6, m5            ; abs(p4 - p0) <= 1
+    por                 m1, m7
+%if ARCH_X86_64
+    mova                m14, [Q4]
+    mova                m15, [Q5]
+%define rq4 m14
+%define rq5 m15
+%else
+%define rq4 [Q4]
+%define rq5 [Q5]
+%endif
+    ABSSUB_GT           m7, rq4, rq0, m6, m5            ; abs(q4 - q0) <= 1
+    por                 m1, m7
+    ABSSUB_GT           m7, rq5, rq0, m6, m5            ; abs(q5 - q0) <= 1
+    por                 m1, m7
+%if ARCH_X86_64
+    mova                m14, [Q6]
+    mova                m15, [Q7]
+%define rq6 m14
+%define rq7 m15
+%else
+%define rq6 [Q6]
+%define rq7 [Q7]
+%endif
+    ABSSUB_GT           m7, rq6, rq0, m6, m5            ; abs(q4 - q0) <= 1
+    por                 m1, m7
+    ABSSUB_GT           m7, rq7, rq0, m6, m5            ; abs(q5 - q0) <= 1
+    por                 m1, m7                          ; flat8out final value
+    pxor                m1, [pb_ff]
+%endif
+
+    ; if (fm) {
+    ;     if (out && in) filter_14()
+    ;     else if (in)   filter_6()
+    ;     else if (hev)  filter_2()
+    ;     else           filter_4()
+    ; }
+    ;
+    ; f14:                                                                            fm &  out &  in
+    ; f6:  fm & ~f14 & in        => fm & ~(out & in) & in                          => fm & ~out &  in
+    ; f2:  fm & ~f14 & ~f6 & hev => fm & ~(out & in) & ~(~out & in) & hev          => fm &  ~in &  hev
+    ; f4:  fm & ~f14 & ~f6 & ~f2 => fm & ~(out & in) & ~(~out & in) & ~(~in & hev) => fm &  ~in & ~hev
+
+    ; (m0: hev, [m1: flat8out], [m2: flat8in], m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7)
+    ; filter2()
+%if %2 != 44
+    mova                m6, [pb_80]                     ; already in m6 if 44_16
+    SCRATCH              2, 15, rsp+%3+%4
+%if %2 == 16
+    SCRATCH              1,  8, rsp+%3+%4+16
+%endif
+%endif
+    pxor                m2, m6, rq0                     ; q0 ^ 0x80
+    pxor                m4, m6, rp0                     ; p0 ^ 0x80
+    psubsb              m2, m4                          ; (signed) q0 - p0
+    pxor                m4, m6, rp1                     ; p1 ^ 0x80
+    pxor                m5, m6, rq1                     ; q1 ^ 0x80
+    psubsb              m4, m5                          ; (signed) p1 - q1
+    paddsb              m4, m2                          ;   (q0 - p0) + (p1 - q1)
+    paddsb              m4, m2                          ; 2*(q0 - p0) + (p1 - q1)
+    paddsb              m4, m2                          ; 3*(q0 - p0) + (p1 - q1)
+    paddsb              m6, m4, [pb_4]                  ; m6: f1 = clip(f + 4, 127)
+    paddsb              m4, [pb_3]                      ; m4: f2 = clip(f + 3, 127)
+%if ARCH_X86_64
+    mova                m14, [pb_10]                    ; will be reused in filter4()
+%define rb10 m14
+%else
+%define rb10 [pb_10]
+%endif
+    SRSHIFT3B_2X        m6, m4, rb10, m7                ; f1 and f2 sign byte shift by 3
+    SIGN_SUB            m7, rq0, m6, m5                 ; m7 = q0 - f1
+    SIGN_ADD            m1, rp0, m4, m5                 ; m1 = p0 + f2
+%if %2 != 44
+%if ARCH_X86_64
+    pandn               m6, m15, m3                     ;  ~mask(in) & mask(fm)
+%else
+    mova                m6, [rsp+%3+%4]
+    pandn               m6, m3
+%endif
+    pand                m6, m0                          ; (~mask(in) & mask(fm)) & mask(hev)
+%else
+    pand                m6, m3, m0
+%endif
+    MASK_APPLY          m7, rq0, m6, m5                 ; m7 = filter2(q0) & mask / we write it in filter4()
+    MASK_APPLY          m1, rp0, m6, m5                 ; m1 = filter2(p0) & mask / we write it in filter4()
+
+    ; (m0: hev, m1: p0', m2: q0-p0, m3: fm, m7: q0', [m8: flat8out], m10..13: p1 p0 q0 q1, m14: pb_10, [m15: flat8in], )
+    ; filter4()
+    mova                m4, m2
+    paddsb              m2, m4                          ; 2 * (q0 - p0)
+    paddsb              m2, m4                          ; 3 * (q0 - p0)
+    paddsb              m6, m2, [pb_4]                  ; m6:  f1 = clip(f + 4, 127)
+    paddsb              m2, [pb_3]                      ; m2: f2 = clip(f + 3, 127)
+    SRSHIFT3B_2X        m6, m2, rb10, m4                ; f1 and f2 sign byte shift by 3
+%if %2 != 44
+%if ARCH_X86_64
+    pandn               m5, m15, m3                     ;               ~mask(in) & mask(fm)
+%else
+    mova                m5, [rsp+%3+%4]
+    pandn               m5, m3
+%endif
+    pandn               m0, m5                          ; ~mask(hev) & (~mask(in) & mask(fm))
+%else
+    pandn               m0, m3
+%endif
+    SIGN_SUB            m5, rq0, m6, m4                 ; q0 - f1
+    MASK_APPLY          m5, m7, m0, m4                  ; filter4(q0) & mask
+    mova                [Q0], m5
+    SIGN_ADD            m7, rp0, m2, m4                 ; p0 + f2
+    MASK_APPLY          m7, m1, m0, m4                  ; filter4(p0) & mask
+    mova                [P0], m7
+    paddb               m6, [pb_80]                     ;
+    pxor                m1, m1                          ;   f=(f1+1)>>1
+    pavgb               m6, m1                          ;
+    psubb               m6, [pb_40]                     ;
+    SIGN_ADD            m1, rp1, m6, m2                 ; p1 + f
+    SIGN_SUB            m4, rq1, m6, m2                 ; q1 - f
+    MASK_APPLY          m1, rp1, m0, m2                 ; m1 = filter4(p1)
+    MASK_APPLY          m4, rq1, m0, m2                 ; m4 = filter4(q1)
+    mova                [P1], m1
+    mova                [Q1], m4
+
+%if %2 != 44
+    UNSCRATCH            2, 15, rsp+%3+%4
+%endif
+
+    ; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1)
+    ; filter6()
+%if %2 != 44
+    pxor                m0, m0
+%if %2 > 16
+    pand                m3, m2
+%else
+    pand                m2, m3                          ;               mask(fm) & mask(in)
+%if ARCH_X86_64
+    pandn               m3, m8, m2                      ; ~mask(out) & (mask(fm) & mask(in))
+%else
+    mova                m3, [rsp+%3+%4+16]
+    pandn               m3, m2
+%endif
+%endif
+%if ARCH_X86_64
+    mova               m14, [P3]
+    mova                m9, [Q3]
+%define rp3 m14
+%define rq3 m9
+%else
+%define rp3 [P3]
+%define rq3 [Q3]
+%endif
+    mova                m1, [P2]
+    FILTER_INIT         m4, m5, m6, m7, [P2], %4, 6,             m3,  m1             ; [p2]
+    mova                m1, [Q2]
+    FILTER_UPDATE       m4, m5, m6, m7, [P1], %4, 0, 1, 2, 5, 3, m3,  "", rq1, "", 1 ; [p1] -p3 -p2 +p1 +q1
+    FILTER_UPDATE       m4, m5, m6, m7, [P0], %4, 0, 2, 3, 6, 3, m3,  "", m1         ; [p0] -p3 -p1 +p0 +q2
+    FILTER_UPDATE       m4, m5, m6, m7, [Q0], %4, 0, 3, 4, 7, 3, m3,  "", rq3, "", 1 ; [q0] -p3 -p0 +q0 +q3
+    FILTER_UPDATE       m4, m5, m6, m7, [Q1], %4, 1, 4, 5, 7, 3, m3,  ""             ; [q1] -p2 -q0 +q1 +q3
+    FILTER_UPDATE       m4, m5, m6, m7, [Q2], %4, 2, 5, 6, 7, 3, m3,  m1             ; [q2] -p1 -q1 +q2 +q3
+%endif
+
+%if %2 == 16
+    UNSCRATCH            1,  8, rsp+%3+%4+16
+%endif
+
+    ; (m0: 0, [m1: flat8out], m2: fm & flat8in, m8..15: q2 q3 p1 p0 q0 q1 p3 p2)
+    ; filter14()
+    ;
+    ;                            m2  m3  m8  m9 m14 m15 m10 m11 m12 m13
+    ;
+    ;                                    q2  q3  p3  p2  p1  p0  q0  q1
+    ; p6  -7                     p7  p6  p5  p4   .   .   .   .   .
+    ; p5  -6  -p7 -p6 +p5 +q1     .   .   .                           .
+    ; p4  -5  -p7 -p5 +p4 +q2     .       .   .                      q2
+    ; p3  -4  -p7 -p4 +p3 +q3     .           .   .                  q3
+    ; p2  -3  -p7 -p3 +p2 +q4     .               .   .              q4
+    ; p1  -2  -p7 -p2 +p1 +q5     .                   .   .          q5
+    ; p0  -1  -p7 -p1 +p0 +q6     .                       .   .      q6
+    ; q0  +0  -p7 -p0 +q0 +q7     .                           .   .  q7
+    ; q1  +1  -p6 -q0 +q1 +q7    q1   .                           .   .
+    ; q2  +2  -p5 -q1 +q2 +q7     .  q2   .                           .
+    ; q3  +3  -p4 -q2 +q3 +q7         .  q3   .                       .
+    ; q4  +4  -p3 -q3 +q4 +q7             .  q4   .                   .
+    ; q5  +5  -p2 -q4 +q5 +q7                 .  q5   .               .
+    ; q6  +6  -p1 -q5 +q6 +q7                     .  q6   .           .
+
+%if %2 == 16
+    pand            m1, m2                                                              ; mask(out) & (mask(fm) & mask(in))
+    mova            m2, [P7]
+    mova            m3, [P6]
+%if ARCH_X86_64
+    mova            m8, [P5]
+    mova            m9, [P4]
+%define rp5 m8
+%define rp4 m9
+%define rp5s m8
+%define rp4s m9
+%define rp3s m14
+%define rq4 m8
+%define rq5 m9
+%define rq6 m14
+%define rq7 m15
+%define rq4s m8
+%define rq5s m9
+%define rq6s m14
+%else
+%define rp5 [P5]
+%define rp4 [P4]
+%define rp5s ""
+%define rp4s ""
+%define rp3s ""
+%define rq4 [Q4]
+%define rq5 [Q5]
+%define rq6 [Q6]
+%define rq7 [Q7]
+%define rq4s ""
+%define rq5s ""
+%define rq6s ""
+%endif
+    FILTER_INIT     m4, m5, m6, m7, [P6], %4, 14,                m1,  m3            ; [p6]
+    FILTER_UPDATE   m4, m5, m6, m7, [P5], %4,  8,  9, 10,  5, 4, m1, rp5s           ; [p5] -p7 -p6 +p5 +q1
+    FILTER_UPDATE   m4, m5, m6, m7, [P4], %4,  8, 10, 11,  6, 4, m1, rp4s           ; [p4] -p7 -p5 +p4 +q2
+    FILTER_UPDATE   m4, m5, m6, m7, [P3], %4,  8, 11,  0,  7, 4, m1, rp3s           ; [p3] -p7 -p4 +p3 +q3
+    FILTER_UPDATE   m4, m5, m6, m7, [P2], %4,  8,  0,  1, 12, 4, m1,  "", rq4, [Q4], 1 ; [p2] -p7 -p3 +p2 +q4
+    FILTER_UPDATE   m4, m5, m6, m7, [P1], %4,  8,  1,  2, 13, 4, m1,  "", rq5, [Q5], 1 ; [p1] -p7 -p2 +p1 +q5
+    FILTER_UPDATE   m4, m5, m6, m7, [P0], %4,  8,  2,  3, 14, 4, m1,  "", rq6, [Q6], 1 ; [p0] -p7 -p1 +p0 +q6
+    FILTER_UPDATE   m4, m5, m6, m7, [Q0], %4,  8,  3,  4, 15, 4, m1,  "", rq7, [Q7], 1 ; [q0] -p7 -p0 +q0 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q1], %4,  9,  4,  5, 15, 4, m1,  ""            ; [q1] -p6 -q0 +q1 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q2], %4, 10,  5,  6, 15, 4, m1,  ""            ; [q2] -p5 -q1 +q2 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q3], %4, 11,  6,  7, 15, 4, m1,  ""            ; [q3] -p4 -q2 +q3 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q4], %4,  0,  7, 12, 15, 4, m1, rq4s           ; [q4] -p3 -q3 +q4 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q5], %4,  1, 12, 13, 15, 4, m1, rq5s           ; [q5] -p2 -q4 +q5 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q6], %4,  2, 13, 14, 15, 4, m1, rq6s           ; [q6] -p1 -q5 +q6 +q7
+%endif
+
+%ifidn %1, h
+%if %2 == 16
+    mova                    m0, [P7]
+    mova                    m1, [P6]
+    mova                    m2, [P5]
+    mova                    m3, [P4]
+    mova                    m4, [P3]
+    mova                    m5, [P2]
+%if ARCH_X86_64
+    mova                    m6, [P1]
+%endif
+    mova                    m7, [P0]
+%if ARCH_X86_64
+    mova                    m8, [Q0]
+    mova                    m9, [Q1]
+    mova                   m10, [Q2]
+    mova                   m11, [Q3]
+    mova                   m12, [Q4]
+    mova                   m13, [Q5]
+    mova                   m14, [Q6]
+    mova                   m15, [Q7]
+    TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+    DEFINE_REAL_P7_TO_Q7
+    movu  [P7],  m0
+    movu  [P6],  m1
+    movu  [P5],  m2
+    movu  [P4],  m3
+    movu  [P3],  m4
+    movu  [P2],  m5
+    movu  [P1],  m6
+    movu  [P0],  m7
+    movu  [Q0],  m8
+    movu  [Q1],  m9
+    movu  [Q2], m10
+    movu  [Q3], m11
+    movu  [Q4], m12
+    movu  [Q5], m13
+    movu  [Q6], m14
+    movu  [Q7], m15
+%else
+    DEFINE_REAL_P7_TO_Q7
+    TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [rsp+32], a, [rsp+%3+%4], [Q0], [Q1]
+    movh   [P7],  m0
+    movh   [P5],  m1
+    movh   [P3],  m2
+    movh   [P1],  m3
+    movh   [Q2],  m5
+    movh   [Q4],  m6
+    movh   [Q6],  m7
+    movhps [P6],  m0
+    movhps [P4],  m1
+    movhps [P2],  m2
+    movhps [P0],  m3
+    movhps [Q3],  m5
+    movhps [Q5],  m6
+    movhps [Q7],  m7
+    DEFINE_TRANSPOSED_P7_TO_Q7
+    mova                    m0, [Q0]
+    mova                    m1, [Q1]
+    mova                    m2, [Q2]
+    mova                    m3, [Q3]
+    mova                    m4, [Q4]
+    mova                    m5, [Q5]
+    mova                    m7, [Q7]
+    DEFINE_REAL_P7_TO_Q7 8
+    TRANSPOSE8x8B 0, 1, 2, 3, 4, 5, 6, 7, [rsp+224], a, [rsp+%3+%4], [Q0], [Q1]
+    movh   [P7],  m0
+    movh   [P5],  m1
+    movh   [P3],  m2
+    movh   [P1],  m3
+    movh   [Q2],  m5
+    movh   [Q4],  m6
+    movh   [Q6],  m7
+    movhps [P6],  m0
+    movhps [P4],  m1
+    movhps [P2],  m2
+    movhps [P0],  m3
+    movhps [Q3],  m5
+    movhps [Q5],  m6
+    movhps [Q7],  m7
+%endif
+%elif %2 == 44
+    SWAP 0, 1   ; m0 = p1
+    SWAP 1, 7   ; m1 = p0
+    SWAP 2, 5   ; m2 = q0
+    SWAP 3, 4   ; m3 = q1
+    DEFINE_REAL_P7_TO_Q7 2
+    SBUTTERFLY  bw, 0, 1, 4
+    SBUTTERFLY  bw, 2, 3, 4
+    SBUTTERFLY  wd, 0, 2, 4
+    SBUTTERFLY  wd, 1, 3, 4
+    movd  [P7], m0
+    movd  [P3], m2
+    movd  [Q0], m1
+    movd  [Q4], m3
+    psrldq  m0, 4
+    psrldq  m1, 4
+    psrldq  m2, 4
+    psrldq  m3, 4
+    movd  [P6], m0
+    movd  [P2], m2
+    movd  [Q1], m1
+    movd  [Q5], m3
+    psrldq  m0, 4
+    psrldq  m1, 4
+    psrldq  m2, 4
+    psrldq  m3, 4
+    movd  [P5], m0
+    movd  [P1], m2
+    movd  [Q2], m1
+    movd  [Q6], m3
+    psrldq  m0, 4
+    psrldq  m1, 4
+    psrldq  m2, 4
+    psrldq  m3, 4
+    movd  [P4], m0
+    movd  [P0], m2
+    movd  [Q3], m1
+    movd  [Q7], m3
+%else
+    ; the following code do a transpose of 8 full lines to 16 half
+    ; lines (high part). It is inlined to avoid the need of a staging area
+    mova                    m0, [P3]
+    mova                    m1, [P2]
+    mova                    m2, [P1]
+    mova                    m3, [P0]
+    mova                    m4, [Q0]
+    mova                    m5, [Q1]
+%if ARCH_X86_64
+    mova                    m6, [Q2]
+%endif
+    mova                    m7, [Q3]
+    DEFINE_REAL_P7_TO_Q7
+%if ARCH_X86_64
+    SBUTTERFLY  bw,  0,  1, 8
+    SBUTTERFLY  bw,  2,  3, 8
+    SBUTTERFLY  bw,  4,  5, 8
+    SBUTTERFLY  bw,  6,  7, 8
+    SBUTTERFLY  wd,  0,  2, 8
+    SBUTTERFLY  wd,  1,  3, 8
+    SBUTTERFLY  wd,  4,  6, 8
+    SBUTTERFLY  wd,  5,  7, 8
+    SBUTTERFLY  dq,  0,  4, 8
+    SBUTTERFLY  dq,  1,  5, 8
+    SBUTTERFLY  dq,  2,  6, 8
+    SBUTTERFLY  dq,  3,  7, 8
+%else
+    SBUTTERFLY  bw,  0,  1, 6
+    mova  [rsp+64], m1
+    mova        m6, [rsp+96]
+    SBUTTERFLY  bw,  2,  3, 1
+    SBUTTERFLY  bw,  4,  5, 1
+    SBUTTERFLY  bw,  6,  7, 1
+    SBUTTERFLY  wd,  0,  2, 1
+    mova  [rsp+96], m2
+    mova        m1, [rsp+64]
+    SBUTTERFLY  wd,  1,  3, 2
+    SBUTTERFLY  wd,  4,  6, 2
+    SBUTTERFLY  wd,  5,  7, 2
+    SBUTTERFLY  dq,  0,  4, 2
+    SBUTTERFLY  dq,  1,  5, 2
+    movh      [Q0], m1
+    movhps    [Q1], m1
+    mova        m2, [rsp+96]
+    SBUTTERFLY  dq,  2,  6, 1
+    SBUTTERFLY  dq,  3,  7, 1
+%endif
+    SWAP         3, 6
+    SWAP         1, 4
+    movh      [P7], m0
+    movhps    [P6], m0
+    movh      [P5], m1
+    movhps    [P4], m1
+    movh      [P3], m2
+    movhps    [P2], m2
+    movh      [P1], m3
+    movhps    [P0], m3
+%if ARCH_X86_64
+    movh      [Q0], m4
+    movhps    [Q1], m4
+%endif
+    movh      [Q2], m5
+    movhps    [Q3], m5
+    movh      [Q4], m6
+    movhps    [Q5], m6
+    movh      [Q6], m7
+    movhps    [Q7], m7
+%endif
+%endif
+
+    RET
+%endmacro
+
+%macro LPF_16_VH 5
+INIT_XMM %5
+LOOPFILTER v, %1, %2,  0, %4
+LOOPFILTER h, %1, %2, %3, %4
+%endmacro
+
+%macro LPF_16_VH_ALL_OPTS 4
+LPF_16_VH %1, %2, %3, %4, sse2
+LPF_16_VH %1, %2, %3, %4, ssse3
+LPF_16_VH %1, %2, %3, %4, avx
+%endmacro
+
+LPF_16_VH_ALL_OPTS 16, 512, 256, 32
+LPF_16_VH_ALL_OPTS 44,   0, 128,  0
+LPF_16_VH_ALL_OPTS 48, 256, 128, 16
+LPF_16_VH_ALL_OPTS 84, 256, 128, 16
+LPF_16_VH_ALL_OPTS 88, 256, 128, 16
diff --git a/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
new file mode 100644
index 000000000..c15437b8b
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9lpf_16bpp.asm
@@ -0,0 +1,823 @@
+;******************************************************************************
+;* VP9 loop filter SIMD optimizations
+;*
+;* Copyright (C) 2015 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_511: times 16 dw 511
+pw_2047: times 16 dw 2047
+pw_16384: times 16 dw 16384
+pw_m512: times 16 dw -512
+pw_m2048: times 16 dw -2048
+
+cextern pw_1
+cextern pw_3
+cextern pw_4
+cextern pw_8
+cextern pw_16
+cextern pw_256
+cextern pw_1023
+cextern pw_4095
+cextern pw_m1
+
+SECTION .text
+
+%macro SCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%if %0 == 4
+%define reg_%4 m%2
+%endif
+%else
+    mova              [%3], m%1
+%if %0 == 4
+%define reg_%4 [%3]
+%endif
+%endif
+%endmacro
+
+%macro UNSCRATCH 3-4
+%if ARCH_X86_64
+    SWAP                %1, %2
+%else
+    mova               m%1, [%3]
+%endif
+%if %0 == 4
+%undef reg_%4
+%endif
+%endmacro
+
+%macro PRELOAD 2-3
+%if ARCH_X86_64
+    mova               m%1, [%2]
+%if %0 == 3
+%define reg_%3 m%1
+%endif
+%elif %0 == 3
+%define reg_%3 [%2]
+%endif
+%endmacro
+
+; calulate p or q portion of flat8out
+%macro FLAT8OUT_HALF 0
+    psubw               m4, m0                      ; q4-q0
+    psubw               m5, m0                      ; q5-q0
+    psubw               m6, m0                      ; q6-q0
+    psubw               m7, m0                      ; q7-q0
+    ABS2                m4, m5, m2, m3              ; abs(q4-q0) | abs(q5-q0)
+    ABS2                m6, m7, m2, m3              ; abs(q6-q0) | abs(q7-q0)
+    pcmpgtw             m4, reg_F                   ; abs(q4-q0) > F
+    pcmpgtw             m5, reg_F                   ; abs(q5-q0) > F
+    pcmpgtw             m6, reg_F                   ; abs(q6-q0) > F
+    pcmpgtw             m7, reg_F                   ; abs(q7-q0) > F
+    por                 m5, m4
+    por                 m7, m6
+    por                 m7, m5                      ; !flat8out, q portion
+%endmacro
+
+; calculate p or q portion of flat8in/hev/fm (excluding mb_edge condition)
+%macro FLAT8IN_HALF 1
+%if %1 > 4
+    psubw               m4, m3, m0                  ; q3-q0
+    psubw               m5, m2, m0                  ; q2-q0
+    ABS2                m4, m5, m6, m7              ; abs(q3-q0) | abs(q2-q0)
+    pcmpgtw             m4, reg_F                   ; abs(q3-q0) > F
+    pcmpgtw             m5, reg_F                   ; abs(q2-q0) > F
+%endif
+    psubw               m3, m2                      ; q3-q2
+    psubw               m2, m1                      ; q2-q1
+    ABS2                m3, m2, m6, m7              ; abs(q3-q2) | abs(q2-q1)
+    pcmpgtw             m3, reg_I                   ; abs(q3-q2) > I
+    pcmpgtw             m2, reg_I                   ; abs(q2-q1) > I
+%if %1 > 4
+    por                 m4, m5
+%endif
+    por                 m2, m3
+    psubw               m3, m1, m0                  ; q1-q0
+    ABS1                m3, m5                      ; abs(q1-q0)
+%if %1 > 4
+    pcmpgtw             m6, m3, reg_F               ; abs(q1-q0) > F
+%endif
+    pcmpgtw             m7, m3, reg_H               ; abs(q1-q0) > H
+    pcmpgtw             m3, reg_I                   ; abs(q1-q0) > I
+%if %1 > 4
+    por                 m4, m6
+%endif
+    por                 m2, m3
+%endmacro
+
+; one step in filter_14/filter_6
+;
+; take sum $reg, downshift, apply mask and write into dst
+;
+; if sub2/add1-2 are present, add/sub as appropriate to prepare for the next
+; step's sum $reg. This is omitted for the last row in each filter.
+;
+; if dont_store is set, don't write the result into memory, instead keep the
+; values in register so we can write it out later
+%macro FILTER_STEP 6-10 "", "", "", 0 ; tmp, reg, mask, shift, dst, \
+                                      ; src/sub1, sub2, add1, add2, dont_store
+    psrlw               %1, %2, %4
+    psubw               %1, %6                      ; abs->delta
+%ifnidn %7, ""
+    psubw               %2, %6
+    psubw               %2, %7
+    paddw               %2, %8
+    paddw               %2, %9
+%endif
+    pand                %1, reg_%3                  ; apply mask
+%if %10 == 1
+    paddw               %6, %1                      ; delta->abs
+%else
+    paddw               %1, %6                      ; delta->abs
+    mova              [%5], %1
+%endif
+%endmacro
+
+; FIXME avx2 versions for 16_16 and mix2_{4,8}{4,8}
+
+%macro LOOP_FILTER 3 ; dir[h/v], wd[4/8/16], bpp[10/12]
+
+%if ARCH_X86_64
+%if %2 == 16
+%assign %%num_xmm_regs 16
+%elif %2 == 8
+%assign %%num_xmm_regs 15
+%else ; %2 == 4
+%assign %%num_xmm_regs 14
+%endif ; %2
+%assign %%bak_mem 0
+%else ; ARCH_X86_32
+%assign %%num_xmm_regs 8
+%if %2 == 16
+%assign %%bak_mem 7
+%elif %2 == 8
+%assign %%bak_mem 6
+%else ; %2 == 4
+%assign %%bak_mem 5
+%endif ; %2
+%endif ; ARCH_X86_64/32
+
+%if %2 == 16
+%ifidn %1, v
+%assign %%num_gpr_regs 6
+%else ; %1 == h
+%assign %%num_gpr_regs 5
+%endif ; %1
+%assign %%wd_mem 6
+%else ; %2 == 8/4
+%assign %%num_gpr_regs 5
+%if ARCH_X86_32 && %2 == 8
+%assign %%wd_mem 2
+%else ; ARCH_X86_64 || %2 == 4
+%assign %%wd_mem 0
+%endif ; ARCH_X86_64/32 etc.
+%endif ; %2
+
+%ifidn %1, v
+%assign %%tsp_mem 0
+%elif %2 == 16 ; && %1 == h
+%assign %%tsp_mem 16
+%else ; %1 == h && %1 == 8/4
+%assign %%tsp_mem 8
+%endif ; %1/%2
+
+%assign %%off %%wd_mem
+%assign %%tspoff %%bak_mem+%%wd_mem
+%assign %%stack_mem ((%%bak_mem+%%wd_mem+%%tsp_mem)*mmsize)
+
+%if %3 == 10
+%define %%maxsgn 511
+%define %%minsgn m512
+%define %%maxusgn 1023
+%define %%maxf 4
+%else ; %3 == 12
+%define %%maxsgn 2047
+%define %%minsgn m2048
+%define %%maxusgn 4095
+%define %%maxf 16
+%endif ; %3
+
+cglobal vp9_loop_filter_%1_%2_%3, 5, %%num_gpr_regs, %%num_xmm_regs, %%stack_mem, dst, stride, E, I, H
+    ; prepare E, I and H masks
+    shl                 Ed, %3-8
+    shl                 Id, %3-8
+    shl                 Hd, %3-8
+%if cpuflag(ssse3)
+    mova                m0, [pw_256]
+%endif
+    movd                m1, Ed
+    movd                m2, Id
+    movd                m3, Hd
+%if cpuflag(ssse3)
+    pshufb              m1, m0                      ; E << (bit_depth - 8)
+    pshufb              m2, m0                      ; I << (bit_depth - 8)
+    pshufb              m3, m0                      ; H << (bit_depth - 8)
+%else
+    punpcklwd           m1, m1
+    punpcklwd           m2, m2
+    punpcklwd           m3, m3
+    pshufd              m1, m1, q0000
+    pshufd              m2, m2, q0000
+    pshufd              m3, m3, q0000
+%endif
+    SCRATCH              1,  8, rsp+(%%off+0)*mmsize,  E
+    SCRATCH              2,  9, rsp+(%%off+1)*mmsize,  I
+    SCRATCH              3, 10, rsp+(%%off+2)*mmsize,  H
+%if %2 > 4
+    PRELOAD                 11, pw_ %+ %%maxf, F
+%endif
+
+    ; set up variables to load data
+%ifidn %1, v
+    DEFINE_ARGS dst8, stride, stride3, dst0, dst4, dst12
+    lea           stride3q, [strideq*3]
+    neg            strideq
+%if %2 == 16
+    lea              dst0q, [dst8q+strideq*8]
+%else
+    lea              dst4q, [dst8q+strideq*4]
+%endif
+    neg            strideq
+%if %2 == 16
+    lea             dst12q, [dst8q+strideq*4]
+    lea              dst4q, [dst0q+strideq*4]
+%endif
+
+%if %2 == 16
+%define %%p7 dst0q
+%define %%p6 dst0q+strideq
+%define %%p5 dst0q+strideq*2
+%define %%p4 dst0q+stride3q
+%endif
+%define %%p3 dst4q
+%define %%p2 dst4q+strideq
+%define %%p1 dst4q+strideq*2
+%define %%p0 dst4q+stride3q
+%define %%q0 dst8q
+%define %%q1 dst8q+strideq
+%define %%q2 dst8q+strideq*2
+%define %%q3 dst8q+stride3q
+%if %2 == 16
+%define %%q4 dst12q
+%define %%q5 dst12q+strideq
+%define %%q6 dst12q+strideq*2
+%define %%q7 dst12q+stride3q
+%endif
+%else ; %1 == h
+    DEFINE_ARGS dst0, stride, stride3, dst4
+    lea           stride3q, [strideq*3]
+    lea              dst4q, [dst0q+strideq*4]
+
+%define %%p3 rsp+(%%tspoff+0)*mmsize
+%define %%p2 rsp+(%%tspoff+1)*mmsize
+%define %%p1 rsp+(%%tspoff+2)*mmsize
+%define %%p0 rsp+(%%tspoff+3)*mmsize
+%define %%q0 rsp+(%%tspoff+4)*mmsize
+%define %%q1 rsp+(%%tspoff+5)*mmsize
+%define %%q2 rsp+(%%tspoff+6)*mmsize
+%define %%q3 rsp+(%%tspoff+7)*mmsize
+
+%if %2 < 16
+    movu                m0, [dst0q+strideq*0-8]
+    movu                m1, [dst0q+strideq*1-8]
+    movu                m2, [dst0q+strideq*2-8]
+    movu                m3, [dst0q+stride3q -8]
+    movu                m4, [dst4q+strideq*0-8]
+    movu                m5, [dst4q+strideq*1-8]
+    movu                m6, [dst4q+strideq*2-8]
+    movu                m7, [dst4q+stride3q -8]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [%%p0], [%%q0]
+%endif
+
+    mova            [%%p3], m0
+    mova            [%%p2], m1
+    mova            [%%p1], m2
+    mova            [%%p0], m3
+%if ARCH_X86_64
+    mova            [%%q0], m4
+%endif
+    mova            [%%q1], m5
+    mova            [%%q2], m6
+    mova            [%%q3], m7
+
+    ; FIXME investigate if we can _not_ load q0-3 below if h, and adjust register
+    ; order here accordingly
+%else ; %2 == 16
+
+%define %%p7 rsp+(%%tspoff+ 8)*mmsize
+%define %%p6 rsp+(%%tspoff+ 9)*mmsize
+%define %%p5 rsp+(%%tspoff+10)*mmsize
+%define %%p4 rsp+(%%tspoff+11)*mmsize
+%define %%q4 rsp+(%%tspoff+12)*mmsize
+%define %%q5 rsp+(%%tspoff+13)*mmsize
+%define %%q6 rsp+(%%tspoff+14)*mmsize
+%define %%q7 rsp+(%%tspoff+15)*mmsize
+
+    mova                m0, [dst0q+strideq*0-16]
+    mova                m1, [dst0q+strideq*1-16]
+    mova                m2, [dst0q+strideq*2-16]
+    mova                m3, [dst0q+stride3q -16]
+    mova                m4, [dst4q+strideq*0-16]
+    mova                m5, [dst4q+strideq*1-16]
+%if ARCH_X86_64
+    mova                m6, [dst4q+strideq*2-16]
+%endif
+    mova                m7, [dst4q+stride3q -16]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2-16], [%%p3], 1
+%endif
+
+    mova            [%%p7], m0
+    mova            [%%p6], m1
+    mova            [%%p5], m2
+    mova            [%%p4], m3
+%if ARCH_X86_64
+    mova            [%%p3], m4
+%endif
+    mova            [%%p2], m5
+    mova            [%%p1], m6
+    mova            [%%p0], m7
+
+    mova                m0, [dst0q+strideq*0]
+    mova                m1, [dst0q+strideq*1]
+    mova                m2, [dst0q+strideq*2]
+    mova                m3, [dst0q+stride3q ]
+    mova                m4, [dst4q+strideq*0]
+    mova                m5, [dst4q+strideq*1]
+%if ARCH_X86_64
+    mova                m6, [dst4q+strideq*2]
+%endif
+    mova                m7, [dst4q+stride3q ]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 12
+%else
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, [dst4q+strideq*2], [%%q4], 1
+%endif
+
+    mova            [%%q0], m0
+    mova            [%%q1], m1
+    mova            [%%q2], m2
+    mova            [%%q3], m3
+%if ARCH_X86_64
+    mova            [%%q4], m4
+%endif
+    mova            [%%q5], m5
+    mova            [%%q6], m6
+    mova            [%%q7], m7
+
+    ; FIXME investigate if we can _not_ load q0|q4-7 below if h, and adjust register
+    ; order here accordingly
+%endif ; %2
+%endif ; %1
+
+    ; load q0|q4-7 data
+    mova                m0, [%%q0]
+%if %2 == 16
+    mova                m4, [%%q4]
+    mova                m5, [%%q5]
+    mova                m6, [%%q6]
+    mova                m7, [%%q7]
+
+    ; flat8out q portion
+    FLAT8OUT_HALF
+    SCRATCH              7, 15, rsp+(%%off+6)*mmsize, F8O
+%endif
+
+    ; load q1-3 data
+    mova                m1, [%%q1]
+    mova                m2, [%%q2]
+    mova                m3, [%%q3]
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flatout[q]
+    ; m12-14=free
+    ; m0-3=q0-q3
+    ; m4-7=free
+
+    ; flat8in|fm|hev q portion
+    FLAT8IN_HALF        %2
+    SCRATCH              7, 13, rsp+(%%off+4)*mmsize, HEV
+%if %2 > 4
+    SCRATCH              4, 14, rsp+(%%off+5)*mmsize, F8I
+%endif
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out[q]
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; m2=!fm[q]
+    ; m0,1=q0-q1
+    ; m2-7=free
+    ; m12=free
+
+    ; load p0-1
+    mova                m3, [%%p0]
+    mova                m4, [%%p1]
+
+    ; fm mb_edge portion
+    psubw               m5, m3, m0                  ; q0-p0
+    psubw               m6, m4, m1                  ; q1-p1
+%if ARCH_X86_64
+    ABS2                m5, m6, m7, m12             ; abs(q0-p0) | abs(q1-p1)
+%else
+    ABS1                m5, m7                      ; abs(q0-p0)
+    ABS1                m6, m7                      ; abs(q1-p1)
+%endif
+    paddw               m5, m5
+    psraw               m6, 1
+    paddw               m6, m5                      ; abs(q0-p0)*2+(abs(q1-p1)>>1)
+    pcmpgtw             m6, reg_E
+    por                 m2, m6
+    SCRATCH              2, 12, rsp+(%%off+3)*mmsize, FM
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out[q]
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; r12[m12]=!fm[q]
+    ; m3-4=q0-1
+    ; m0-2/5-7=free
+
+    ; load p4-7 data
+    SWAP                 3, 0                       ; p0
+    SWAP                 4, 1                       ; p1
+%if %2 == 16
+    mova                m7, [%%p7]
+    mova                m6, [%%p6]
+    mova                m5, [%%p5]
+    mova                m4, [%%p4]
+
+    ; flat8out p portion
+    FLAT8OUT_HALF
+    por                 m7, reg_F8O
+    SCRATCH              7, 15, rsp+(%%off+6)*mmsize, F8O
+%endif
+
+    ; r6-8|pw_4[m8-11]=reg_E/I/H/F
+    ; r9[m15]=!flat8out
+    ; r10[m13]=hev[q]
+    ; r11[m14]=!flat8in[q]
+    ; r12[m12]=!fm[q]
+    ; m0=p0
+    ; m1-7=free
+
+    ; load p2-3 data
+    mova                m2, [%%p2]
+    mova                m3, [%%p3]
+
+    ; flat8in|fm|hev p portion
+    FLAT8IN_HALF        %2
+    por                 m7, reg_HEV
+%if %2 > 4
+    por                 m4, reg_F8I
+%endif
+    por                 m2, reg_FM
+%if %2 > 4
+    por                 m4, m2                      ; !flat8|!fm
+%if %2 == 16
+    por                 m5, m4, reg_F8O             ; !flat16|!fm
+    pandn               m2, m4                      ; filter4_mask
+    pandn               m4, m5                      ; filter8_mask
+    pxor                m5, [pw_m1]                 ; filter16_mask
+    SCRATCH              5, 15, rsp+(%%off+6)*mmsize, F16M
+%else
+    pandn               m2, m4                      ; filter4_mask
+    pxor                m4, [pw_m1]                 ; filter8_mask
+%endif
+    SCRATCH              4, 14, rsp+(%%off+5)*mmsize, F8M
+%else
+    pxor                m2, [pw_m1]                 ; filter4_mask
+%endif
+    SCRATCH              7, 13, rsp+(%%off+4)*mmsize, HEV
+    SCRATCH              2, 12, rsp+(%%off+3)*mmsize, F4M
+
+    ; r9[m15]=filter16_mask
+    ; r10[m13]=hev
+    ; r11[m14]=filter8_mask
+    ; r12[m12]=filter4_mask
+    ; m0,1=p0-p1
+    ; m2-7=free
+    ; m8-11=free
+
+%if %2 > 4
+%if %2 == 16
+    ; filter_14
+    mova                m2, [%%p7]
+    mova                m3, [%%p6]
+    mova                m6, [%%p5]
+    mova                m7, [%%p4]
+    PRELOAD              8, %%p3, P3
+    PRELOAD              9, %%p2, P2
+%endif
+    PRELOAD             10, %%q0, Q0
+    PRELOAD             11, %%q1, Q1
+%if %2 == 16
+    psllw               m4, m2, 3
+    paddw               m5, m3, m3
+    paddw               m4, m6
+    paddw               m5, m7
+    paddw               m4, reg_P3
+    paddw               m5, reg_P2
+    paddw               m4, m1
+    paddw               m5, m0
+    paddw               m4, reg_Q0                  ; q0+p1+p3+p5+p7*8
+    psubw               m5, m2                      ; p0+p2+p4+p6*2-p7
+    paddw               m4, [pw_8]
+    paddw               m5, m4                      ; q0+p0+p1+p2+p3+p4+p5+p6*2+p7*7+8
+
+    ; below, we use r0-5 for storing pre-filter pixels for subsequent subtraction
+    ; at the end of the filter
+
+    mova    [rsp+0*mmsize], m3
+    FILTER_STEP         m4, m5, F16M, 4, %%p6, m3,     m2,             m6,     reg_Q1
+%endif
+    mova                m3, [%%q2]
+%if %2 == 16
+    mova    [rsp+1*mmsize], m6
+    FILTER_STEP         m4, m5, F16M, 4, %%p5, m6,     m2,             m7,     m3
+%endif
+    mova                m6, [%%q3]
+%if %2 == 16
+    mova    [rsp+2*mmsize], m7
+    FILTER_STEP         m4, m5, F16M, 4, %%p4, m7,     m2,             reg_P3, m6
+    mova                m7, [%%q4]
+%if ARCH_X86_64
+    mova    [rsp+3*mmsize], reg_P3
+%else
+    mova                m4, reg_P3
+    mova    [rsp+3*mmsize], m4
+%endif
+    FILTER_STEP         m4, m5, F16M, 4, %%p3, reg_P3, m2,             reg_P2, m7
+    PRELOAD              8, %%q5, Q5
+%if ARCH_X86_64
+    mova    [rsp+4*mmsize], reg_P2
+%else
+    mova                m4, reg_P2
+    mova    [rsp+4*mmsize], m4
+%endif
+    FILTER_STEP         m4, m5, F16M, 4, %%p2, reg_P2, m2,             m1,     reg_Q5
+    PRELOAD              9, %%q6, Q6
+    mova    [rsp+5*mmsize], m1
+    FILTER_STEP         m4, m5, F16M, 4, %%p1, m1,     m2,             m0,     reg_Q6
+    mova                m1, [%%q7]
+    FILTER_STEP         m4, m5, F16M, 4, %%p0, m0,     m2,             reg_Q0, m1,     1
+    FILTER_STEP         m4, m5, F16M, 4, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m1,     ARCH_X86_64
+    FILTER_STEP         m4, m5, F16M, 4, %%q1, reg_Q1, [rsp+1*mmsize], m3,     m1,     ARCH_X86_64
+    FILTER_STEP         m4, m5, F16M, 4, %%q2, m3,     [rsp+2*mmsize], m6,     m1,     1
+    FILTER_STEP         m4, m5, F16M, 4, %%q3, m6,     [rsp+3*mmsize], m7,     m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q4, m7,     [rsp+4*mmsize], reg_Q5, m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q5, reg_Q5, [rsp+5*mmsize], reg_Q6, m1
+    FILTER_STEP         m4, m5, F16M, 4, %%q6, reg_Q6
+
+    mova                m7, [%%p1]
+%else
+    SWAP                 1, 7
+%endif
+
+    mova                m2, [%%p3]
+    mova                m1, [%%p2]
+
+    ; reg_Q0-1 (m10-m11)
+    ; m0=p0
+    ; m1=p2
+    ; m2=p3
+    ; m3=q2
+    ; m4-5=free
+    ; m6=q3
+    ; m7=p1
+    ; m8-9 unused
+
+    ; filter_6
+    psllw               m4, m2, 2
+    paddw               m5, m1, m1
+    paddw               m4, m7
+    psubw               m5, m2
+    paddw               m4, m0
+    paddw               m5, reg_Q0
+    paddw               m4, [pw_4]
+    paddw               m5, m4
+
+%if ARCH_X86_64
+    mova                m8, m1
+    mova                m9, m7
+%else
+    mova    [rsp+0*mmsize], m1
+    mova    [rsp+1*mmsize], m7
+%endif
+%ifidn %1, v
+    FILTER_STEP         m4, m5, F8M, 3, %%p2, m1,     m2,             m7,     reg_Q1
+%else
+    FILTER_STEP         m4, m5, F8M, 3, %%p2, m1,     m2,             m7,     reg_Q1, 1
+%endif
+    FILTER_STEP         m4, m5, F8M, 3, %%p1, m7,     m2,             m0,     m3, 1
+    FILTER_STEP         m4, m5, F8M, 3, %%p0, m0,     m2,             reg_Q0, m6, 1
+%if ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q0, reg_Q0, m8,             reg_Q1, m6, ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q1, reg_Q1, m9,             m3,     m6, ARCH_X86_64
+%else
+    FILTER_STEP         m4, m5, F8M, 3, %%q0, reg_Q0, [rsp+0*mmsize], reg_Q1, m6, ARCH_X86_64
+    FILTER_STEP         m4, m5, F8M, 3, %%q1, reg_Q1, [rsp+1*mmsize], m3,     m6, ARCH_X86_64
+%endif
+    FILTER_STEP         m4, m5, F8M, 3, %%q2, m3
+
+    UNSCRATCH            2, 10, %%q0
+    UNSCRATCH            6, 11, %%q1
+%else
+    SWAP                 1, 7
+    mova                m2, [%%q0]
+    mova                m6, [%%q1]
+%endif
+    UNSCRATCH            3, 13, rsp+(%%off+4)*mmsize, HEV
+
+    ; m0=p0
+    ; m1=p2
+    ; m2=q0
+    ; m3=hev_mask
+    ; m4-5=free
+    ; m6=q1
+    ; m7=p1
+
+    ; filter_4
+    psubw               m4, m7, m6              ; p1-q1
+    psubw               m5, m2, m0              ; q0-p0
+    pand                m4, m3
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    pmaxsw              m4, [pw_ %+ %%minsgn]   ; clip_intp2(p1-q1, 9) -> f
+    paddw               m4, m5
+    paddw               m5, m5
+    paddw               m4, m5                  ; 3*(q0-p0)+f
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    pmaxsw              m4, [pw_ %+ %%minsgn]   ; clip_intp2(3*(q0-p0)+f, 9) -> f
+    pand                m4, reg_F4M
+    paddw               m5, m4, [pw_4]
+    paddw               m4, [pw_3]
+    pminsw              m5, [pw_ %+ %%maxsgn]
+    pminsw              m4, [pw_ %+ %%maxsgn]
+    psraw               m5, 3                   ; min_intp2(f+4, 9)>>3 -> f1
+    psraw               m4, 3                   ; min_intp2(f+3, 9)>>3 -> f2
+    psubw               m2, m5                  ; q0-f1
+    paddw               m0, m4                  ; p0+f2
+    pandn               m3, m5                  ; f1 & !hev (for p1/q1 adj)
+    pxor                m4, m4
+    mova                m5, [pw_ %+ %%maxusgn]
+    pmaxsw              m2, m4
+    pmaxsw              m0, m4
+    pminsw              m2, m5
+    pminsw              m0, m5
+%if cpuflag(ssse3)
+    pmulhrsw            m3, [pw_16384]          ; (f1+1)>>1
+%else
+    paddw               m3, [pw_1]
+    psraw               m3, 1
+%endif
+    paddw               m7, m3                  ; p1+f
+    psubw               m6, m3                  ; q1-f
+    pmaxsw              m7, m4
+    pmaxsw              m6, m4
+    pminsw              m7, m5
+    pminsw              m6, m5
+
+    ; store
+%ifidn %1, v
+    mova            [%%p1], m7
+    mova            [%%p0], m0
+    mova            [%%q0], m2
+    mova            [%%q1], m6
+%else ; %1 == h
+%if %2 == 4
+    TRANSPOSE4x4W        7, 0, 2, 6, 1
+    movh   [dst0q+strideq*0-4], m7
+    movhps [dst0q+strideq*1-4], m7
+    movh   [dst0q+strideq*2-4], m0
+    movhps [dst0q+stride3q -4], m0
+    movh   [dst4q+strideq*0-4], m2
+    movhps [dst4q+strideq*1-4], m2
+    movh   [dst4q+strideq*2-4], m6
+    movhps [dst4q+stride3q -4], m6
+%elif %2 == 8
+    mova                m3, [%%p3]
+    mova                m4, [%%q2]
+    mova                m5, [%%q3]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        3, 1, 7, 0, 2, 6, 4, 5, 8
+%else
+    TRANSPOSE8x8W        3, 1, 7, 0, 2, 6, 4, 5, [%%q2], [%%q0], 1
+    mova                m2, [%%q0]
+%endif
+
+    movu [dst0q+strideq*0-8], m3
+    movu [dst0q+strideq*1-8], m1
+    movu [dst0q+strideq*2-8], m7
+    movu [dst0q+stride3q -8], m0
+    movu [dst4q+strideq*0-8], m2
+    movu [dst4q+strideq*1-8], m6
+    movu [dst4q+strideq*2-8], m4
+    movu [dst4q+stride3q -8], m5
+%else ; %2 == 16
+    SCRATCH              2, 8, %%q0
+    SCRATCH              6, 9, %%q1
+    mova                m2, [%%p7]
+    mova                m3, [%%p6]
+    mova                m4, [%%p5]
+    mova                m5, [%%p4]
+    mova                m6, [%%p3]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        2, 3, 4, 5, 6, 1, 7, 0, 10
+%else
+    mova            [%%p1], m7
+    TRANSPOSE8x8W        2, 3, 4, 5, 6, 1, 7, 0, [%%p1], [dst4q+strideq*0-16], 1
+%endif
+
+    mova [dst0q+strideq*0-16], m2
+    mova [dst0q+strideq*1-16], m3
+    mova [dst0q+strideq*2-16], m4
+    mova [dst0q+stride3q -16], m5
+%if ARCH_X86_64
+    mova [dst4q+strideq*0-16], m6
+%endif
+    mova [dst4q+strideq*1-16], m1
+    mova [dst4q+strideq*2-16], m7
+    mova [dst4q+stride3q -16], m0
+
+    UNSCRATCH            2, 8, %%q0
+    UNSCRATCH            6, 9, %%q1
+    mova                m0, [%%q2]
+    mova                m1, [%%q3]
+    mova                m3, [%%q4]
+    mova                m4, [%%q5]
+%if ARCH_X86_64
+    mova                m5, [%%q6]
+%endif
+    mova                m7, [%%q7]
+
+%if ARCH_X86_64
+    TRANSPOSE8x8W        2, 6, 0, 1, 3, 4, 5, 7, 8
+%else
+    TRANSPOSE8x8W        2, 6, 0, 1, 3, 4, 5, 7, [%%q6], [dst4q+strideq*0], 1
+%endif
+
+    mova [dst0q+strideq*0], m2
+    mova [dst0q+strideq*1], m6
+    mova [dst0q+strideq*2], m0
+    mova [dst0q+stride3q ], m1
+%if ARCH_X86_64
+    mova [dst4q+strideq*0], m3
+%endif
+    mova [dst4q+strideq*1], m4
+    mova [dst4q+strideq*2], m5
+    mova [dst4q+stride3q ], m7
+%endif ; %2
+%endif ; %1
+    RET
+%endmacro
+
+%macro LOOP_FILTER_CPUSETS 3
+INIT_XMM sse2
+LOOP_FILTER %1, %2, %3
+INIT_XMM ssse3
+LOOP_FILTER %1, %2, %3
+INIT_XMM avx
+LOOP_FILTER %1, %2, %3
+%endmacro
+
+%macro LOOP_FILTER_WDSETS 2
+LOOP_FILTER_CPUSETS %1,  4, %2
+LOOP_FILTER_CPUSETS %1,  8, %2
+LOOP_FILTER_CPUSETS %1, 16, %2
+%endmacro
+
+LOOP_FILTER_WDSETS h, 10
+LOOP_FILTER_WDSETS v, 10
+LOOP_FILTER_WDSETS h, 12
+LOOP_FILTER_WDSETS v, 12
diff --git a/media/ffvpx/libavcodec/x86/vp9mc.asm b/media/ffvpx/libavcodec/x86/vp9mc.asm
new file mode 100644
index 000000000..9152ba541
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9mc.asm
@@ -0,0 +1,676 @@
+;******************************************************************************
+;* VP9 MC SIMD optimizations
+;*
+;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+cextern pw_256
+cextern pw_64
+
+%macro F8_SSSE3_TAPS 8
+times 16 db %1, %2
+times 16 db %3, %4
+times 16 db %5, %6
+times 16 db %7, %8
+%endmacro
+
+%macro F8_SSE2_TAPS 8
+times 8 dw %1
+times 8 dw %2
+times 8 dw %3
+times 8 dw %4
+times 8 dw %5
+times 8 dw %6
+times 8 dw %7
+times 8 dw %8
+%endmacro
+
+%macro F8_16BPP_TAPS 8
+times 8 dw %1, %2
+times 8 dw %3, %4
+times 8 dw %5, %6
+times 8 dw %7, %8
+%endmacro
+
+%macro FILTER 1
+const filters_%1 ; smooth
+                    F8_TAPS -3, -1,  32,  64,  38,   1, -3,  0
+                    F8_TAPS -2, -2,  29,  63,  41,   2, -3,  0
+                    F8_TAPS -2, -2,  26,  63,  43,   4, -4,  0
+                    F8_TAPS -2, -3,  24,  62,  46,   5, -4,  0
+                    F8_TAPS -2, -3,  21,  60,  49,   7, -4,  0
+                    F8_TAPS -1, -4,  18,  59,  51,   9, -4,  0
+                    F8_TAPS -1, -4,  16,  57,  53,  12, -4, -1
+                    F8_TAPS -1, -4,  14,  55,  55,  14, -4, -1
+                    F8_TAPS -1, -4,  12,  53,  57,  16, -4, -1
+                    F8_TAPS  0, -4,   9,  51,  59,  18, -4, -1
+                    F8_TAPS  0, -4,   7,  49,  60,  21, -3, -2
+                    F8_TAPS  0, -4,   5,  46,  62,  24, -3, -2
+                    F8_TAPS  0, -4,   4,  43,  63,  26, -2, -2
+                    F8_TAPS  0, -3,   2,  41,  63,  29, -2, -2
+                    F8_TAPS  0, -3,   1,  38,  64,  32, -1, -3
+                    ; regular
+                    F8_TAPS  0,  1,  -5, 126,   8,  -3,  1,  0
+                    F8_TAPS -1,  3, -10, 122,  18,  -6,  2,  0
+                    F8_TAPS -1,  4, -13, 118,  27,  -9,  3, -1
+                    F8_TAPS -1,  4, -16, 112,  37, -11,  4, -1
+                    F8_TAPS -1,  5, -18, 105,  48, -14,  4, -1
+                    F8_TAPS -1,  5, -19,  97,  58, -16,  5, -1
+                    F8_TAPS -1,  6, -19,  88,  68, -18,  5, -1
+                    F8_TAPS -1,  6, -19,  78,  78, -19,  6, -1
+                    F8_TAPS -1,  5, -18,  68,  88, -19,  6, -1
+                    F8_TAPS -1,  5, -16,  58,  97, -19,  5, -1
+                    F8_TAPS -1,  4, -14,  48, 105, -18,  5, -1
+                    F8_TAPS -1,  4, -11,  37, 112, -16,  4, -1
+                    F8_TAPS -1,  3,  -9,  27, 118, -13,  4, -1
+                    F8_TAPS  0,  2,  -6,  18, 122, -10,  3, -1
+                    F8_TAPS  0,  1,  -3,   8, 126,  -5,  1,  0
+                    ; sharp
+                    F8_TAPS -1,  3,  -7, 127,   8,  -3,  1,  0
+                    F8_TAPS -2,  5, -13, 125,  17,  -6,  3, -1
+                    F8_TAPS -3,  7, -17, 121,  27, -10,  5, -2
+                    F8_TAPS -4,  9, -20, 115,  37, -13,  6, -2
+                    F8_TAPS -4, 10, -23, 108,  48, -16,  8, -3
+                    F8_TAPS -4, 10, -24, 100,  59, -19,  9, -3
+                    F8_TAPS -4, 11, -24,  90,  70, -21, 10, -4
+                    F8_TAPS -4, 11, -23,  80,  80, -23, 11, -4
+                    F8_TAPS -4, 10, -21,  70,  90, -24, 11, -4
+                    F8_TAPS -3,  9, -19,  59, 100, -24, 10, -4
+                    F8_TAPS -3,  8, -16,  48, 108, -23, 10, -4
+                    F8_TAPS -2,  6, -13,  37, 115, -20,  9, -4
+                    F8_TAPS -2,  5, -10,  27, 121, -17,  7, -3
+                    F8_TAPS -1,  3,  -6,  17, 125, -13,  5, -2
+                    F8_TAPS  0,  1,  -3,   8, 127,  -7,  3, -1
+%endmacro
+
+%define F8_TAPS F8_SSSE3_TAPS
+; int8_t ff_filters_ssse3[3][15][4][32]
+FILTER ssse3
+%define F8_TAPS F8_SSE2_TAPS
+; int16_t ff_filters_sse2[3][15][8][8]
+FILTER sse2
+%define F8_TAPS F8_16BPP_TAPS
+; int16_t ff_filters_16bpp[3][15][4][16]
+FILTER 16bpp
+
+SECTION .text
+
+%macro filter_sse2_h_fn 1
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 15, dst, dstride, src, sstride, h, filtery
+    pxor        m5, m5
+    mova        m6, [pw_64]
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 16]
+    mova        m9, [filteryq+ 32]
+    mova       m10, [filteryq+ 48]
+    mova       m11, [filteryq+ 64]
+    mova       m12, [filteryq+ 80]
+    mova       m13, [filteryq+ 96]
+    mova       m14, [filteryq+112]
+%endif
+.loop:
+    movh        m0, [srcq-3]
+    movh        m1, [srcq-2]
+    movh        m2, [srcq-1]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+1]
+    punpcklbw   m0, m5
+    punpcklbw   m1, m5
+    punpcklbw   m2, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+    pmullw      m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m8
+    pmullw      m2, m9
+    pmullw      m3, m10
+    pmullw      m4, m11
+%else
+    pmullw      m1, [filteryq+ 16]
+    pmullw      m2, [filteryq+ 32]
+    pmullw      m3, [filteryq+ 48]
+    pmullw      m4, [filteryq+ 64]
+%endif
+    paddw       m0, m1
+    paddw       m2, m3
+    paddw       m0, m4
+    movh        m1, [srcq+2]
+    movh        m3, [srcq+3]
+    movh        m4, [srcq+4]
+    add       srcq, sstrideq
+    punpcklbw   m1, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m12
+    pmullw      m3, m13
+    pmullw      m4, m14
+%else
+    pmullw      m1, [filteryq+ 80]
+    pmullw      m3, [filteryq+ 96]
+    pmullw      m4, [filteryq+112]
+%endif
+    paddw       m0, m1
+    paddw       m3, m4
+    paddw       m0, m6
+    paddw       m2, m3
+    paddsw      m0, m2
+    psraw       m0, 7
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+filter_sse2_h_fn put
+filter_sse2_h_fn avg
+
+INIT_XMM sse2
+filter_sse2_h_fn put
+filter_sse2_h_fn avg
+
+%macro filter_h_fn 1
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 11, dst, dstride, src, sstride, h, filtery
+    mova        m6, [pw_256]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movh        m0, [srcq-3]
+    movh        m1, [srcq-2]
+    movh        m2, [srcq-1]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+1]
+    movh        m5, [srcq+2]
+    punpcklbw   m0, m1
+    punpcklbw   m2, m3
+    movh        m1, [srcq+3]
+    movh        m3, [srcq+4]
+    add       srcq, sstrideq
+    punpcklbw   m4, m5
+    punpcklbw   m1, m3
+    pmaddubsw   m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddubsw   m2, m8
+    pmaddubsw   m4, m9
+    pmaddubsw   m1, m10
+%else
+    pmaddubsw   m2, [filteryq+32]
+    pmaddubsw   m4, [filteryq+64]
+    pmaddubsw   m1, [filteryq+96]
+%endif
+    paddw       m0, m4
+    paddw       m2, m1
+    paddsw      m0, m2
+    pmulhrsw    m0, m6
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX ssse3
+filter_h_fn put
+filter_h_fn avg
+
+INIT_XMM ssse3
+filter_h_fn put
+filter_h_fn avg
+
+%if ARCH_X86_64
+%macro filter_hx2_fn 1
+%assign %%px mmsize
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _8, 6, 6, 14, dst, dstride, src, sstride, h, filtery
+    mova       m13, [pw_256]
+    mova        m8, [filteryq+ 0]
+    mova        m9, [filteryq+32]
+    mova       m10, [filteryq+64]
+    mova       m11, [filteryq+96]
+.loop:
+    movu        m0, [srcq-3]
+    movu        m1, [srcq-2]
+    movu        m2, [srcq-1]
+    movu        m3, [srcq+0]
+    movu        m4, [srcq+1]
+    movu        m5, [srcq+2]
+    movu        m6, [srcq+3]
+    movu        m7, [srcq+4]
+    add       srcq, sstrideq
+    SBUTTERFLY  bw, 0, 1, 12
+    SBUTTERFLY  bw, 2, 3, 12
+    SBUTTERFLY  bw, 4, 5, 12
+    SBUTTERFLY  bw, 6, 7, 12
+    pmaddubsw   m0, m8
+    pmaddubsw   m1, m8
+    pmaddubsw   m2, m9
+    pmaddubsw   m3, m9
+    pmaddubsw   m4, m10
+    pmaddubsw   m5, m10
+    pmaddubsw   m6, m11
+    pmaddubsw   m7, m11
+    paddw       m0, m4
+    paddw       m1, m5
+    paddw       m2, m6
+    paddw       m3, m7
+    paddsw      m0, m2
+    paddsw      m1, m3
+    pmulhrsw    m0, m13
+    pmulhrsw    m1, m13
+    packuswb    m0, m1
+%ifidn %1, avg
+    pavgb       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+filter_hx2_fn put
+filter_hx2_fn avg
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_hx2_fn put
+filter_hx2_fn avg
+%endif
+
+%endif ; ARCH_X86_64
+
+%macro filter_sse2_v_fn 1
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    pxor        m5, m5
+    mova        m6, [pw_64]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 16]
+    mova        m9, [filteryq+ 32]
+    mova       m10, [filteryq+ 48]
+    mova       m11, [filteryq+ 64]
+    mova       m12, [filteryq+ 80]
+    mova       m13, [filteryq+ 96]
+    mova       m14, [filteryq+112]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movh        m4, [src4q]
+    punpcklbw   m0, m5
+    punpcklbw   m1, m5
+    punpcklbw   m2, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+    pmullw      m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m8
+    pmullw      m2, m9
+    pmullw      m3, m10
+    pmullw      m4, m11
+%else
+    pmullw      m1, [filteryq+ 16]
+    pmullw      m2, [filteryq+ 32]
+    pmullw      m3, [filteryq+ 48]
+    pmullw      m4, [filteryq+ 64]
+%endif
+    paddw       m0, m1
+    paddw       m2, m3
+    paddw       m0, m4
+    movh        m1, [src4q+sstrideq]
+    movh        m3, [src4q+sstrideq*2]
+    movh        m4, [src4q+sstride3q]
+    add      src4q, sstrideq
+    punpcklbw   m1, m5
+    punpcklbw   m3, m5
+    punpcklbw   m4, m5
+%if ARCH_X86_64 && mmsize > 8
+    pmullw      m1, m12
+    pmullw      m3, m13
+    pmullw      m4, m14
+%else
+    pmullw      m1, [filteryq+ 80]
+    pmullw      m3, [filteryq+ 96]
+    pmullw      m4, [filteryq+112]
+%endif
+    paddw       m0, m1
+    paddw       m3, m4
+    paddw       m0, m6
+    paddw       m2, m3
+    paddsw      m0, m2
+    psraw       m0, 7
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX mmxext
+filter_sse2_v_fn put
+filter_sse2_v_fn avg
+
+INIT_XMM sse2
+filter_sse2_v_fn put
+filter_sse2_v_fn avg
+
+%macro filter_v_fn 1
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m6, [pw_256]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    movh        m4, [src4q]
+    movh        m5, [src4q+sstrideq]
+    punpcklbw   m0, m1
+    punpcklbw   m2, m3
+    movh        m1, [src4q+sstrideq*2]
+    movh        m3, [src4q+sstride3q]
+    add       srcq, sstrideq
+    add      src4q, sstrideq
+    punpcklbw   m4, m5
+    punpcklbw   m1, m3
+    pmaddubsw   m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddubsw   m2, m8
+    pmaddubsw   m4, m9
+    pmaddubsw   m1, m10
+%else
+    pmaddubsw   m2, [filteryq+32]
+    pmaddubsw   m4, [filteryq+64]
+    pmaddubsw   m1, [filteryq+96]
+%endif
+    paddw       m0, m4
+    paddw       m2, m1
+    paddsw      m0, m2
+    pmulhrsw    m0, m6
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    packuswb    m0, m0
+%ifidn %1, avg
+    pavgb       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_MMX ssse3
+filter_v_fn put
+filter_v_fn avg
+
+INIT_XMM ssse3
+filter_v_fn put
+filter_v_fn avg
+
+%if ARCH_X86_64
+
+%macro filter_vx2_fn 1
+%assign %%px mmsize
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _8, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
+    mova       m13, [pw_256]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m8, [filteryq+ 0]
+    mova        m9, [filteryq+32]
+    mova       m10, [filteryq+64]
+    mova       m11, [filteryq+96]
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movu        m0, [srcq]
+    movu        m1, [srcq+sstrideq]
+    movu        m2, [srcq+sstrideq*2]
+    movu        m3, [srcq+sstride3q]
+    movu        m4, [src4q]
+    movu        m5, [src4q+sstrideq]
+    movu        m6, [src4q+sstrideq*2]
+    movu        m7, [src4q+sstride3q]
+    add       srcq, sstrideq
+    add      src4q, sstrideq
+    SBUTTERFLY  bw, 0, 1, 12
+    SBUTTERFLY  bw, 2, 3, 12
+    SBUTTERFLY  bw, 4, 5, 12
+    SBUTTERFLY  bw, 6, 7, 12
+    pmaddubsw   m0, m8
+    pmaddubsw   m1, m8
+    pmaddubsw   m2, m9
+    pmaddubsw   m3, m9
+    pmaddubsw   m4, m10
+    pmaddubsw   m5, m10
+    pmaddubsw   m6, m11
+    pmaddubsw   m7, m11
+    paddw       m0, m4
+    paddw       m1, m5
+    paddw       m2, m6
+    paddw       m3, m7
+    paddsw      m0, m2
+    paddsw      m1, m3
+    pmulhrsw    m0, m13
+    pmulhrsw    m1, m13
+    packuswb    m0, m1
+%ifidn %1, avg
+    pavgb       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+filter_vx2_fn put
+filter_vx2_fn avg
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_vx2_fn put
+filter_vx2_fn avg
+%endif
+
+%endif ; ARCH_X86_64
+
+%macro fpel_fn 6-8 0, 4
+%if %2 == 4
+%define %%srcfn movh
+%define %%dstfn movh
+%else
+%define %%srcfn movu
+%define %%dstfn mova
+%endif
+
+%if %7 == 8
+%define %%pavg pavgb
+%define %%szsuf _8
+%elif %7 == 16
+%define %%pavg pavgw
+%define %%szsuf _16
+%else
+%define %%szsuf
+%endif
+
+%if %2 <= mmsize
+cglobal vp9_%1%2 %+ %%szsuf, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
+    lea  sstride3q, [sstrideq*3]
+    lea  dstride3q, [dstrideq*3]
+%else
+cglobal vp9_%1%2 %+ %%szsuf, 5, 5, %8, dst, dstride, src, sstride, h
+%endif
+.loop:
+    %%srcfn     m0, [srcq]
+    %%srcfn     m1, [srcq+s%3]
+    %%srcfn     m2, [srcq+s%4]
+    %%srcfn     m3, [srcq+s%5]
+%if %2/mmsize == 8
+    %%srcfn     m4, [srcq+mmsize*4]
+    %%srcfn     m5, [srcq+mmsize*5]
+    %%srcfn     m6, [srcq+mmsize*6]
+    %%srcfn     m7, [srcq+mmsize*7]
+%endif
+    lea       srcq, [srcq+sstrideq*%6]
+%ifidn %1, avg
+    %%pavg      m0, [dstq]
+    %%pavg      m1, [dstq+d%3]
+    %%pavg      m2, [dstq+d%4]
+    %%pavg      m3, [dstq+d%5]
+%if %2/mmsize == 8
+    %%pavg      m4, [dstq+mmsize*4]
+    %%pavg      m5, [dstq+mmsize*5]
+    %%pavg      m6, [dstq+mmsize*6]
+    %%pavg      m7, [dstq+mmsize*7]
+%endif
+%endif
+    %%dstfn [dstq], m0
+    %%dstfn [dstq+d%3], m1
+    %%dstfn [dstq+d%4], m2
+    %%dstfn [dstq+d%5], m3
+%if %2/mmsize == 8
+    %%dstfn [dstq+mmsize*4], m4
+    %%dstfn [dstq+mmsize*5], m5
+    %%dstfn [dstq+mmsize*6], m6
+    %%dstfn [dstq+mmsize*7], m7
+%endif
+    lea       dstq, [dstq+dstrideq*%6]
+    sub         hd, %6
+    jnz .loop
+    RET
+%endmacro
+
+%define d16 16
+%define s16 16
+%define d32 32
+%define s32 32
+INIT_MMX mmx
+fpel_fn put, 4,  strideq, strideq*2, stride3q, 4
+fpel_fn put, 8,  strideq, strideq*2, stride3q, 4
+INIT_MMX mmxext
+fpel_fn avg, 4,  strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 8,  strideq, strideq*2, stride3q, 4, 8
+INIT_XMM sse
+fpel_fn put, 16, strideq, strideq*2, stride3q, 4
+fpel_fn put, 32, mmsize,  strideq,   strideq+mmsize, 2
+fpel_fn put, 64, mmsize,  mmsize*2,  mmsize*3, 1
+fpel_fn put, 128, mmsize, mmsize*2,  mmsize*3, 1, 0, 8
+INIT_XMM sse2
+fpel_fn avg, 16, strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 32, mmsize,  strideq,   strideq+mmsize, 2, 8
+fpel_fn avg, 64, mmsize,  mmsize*2,  mmsize*3, 1, 8
+INIT_YMM avx
+fpel_fn put, 32, strideq, strideq*2, stride3q, 4
+fpel_fn put, 64, mmsize,  strideq,   strideq+mmsize, 2
+fpel_fn put, 128, mmsize, mmsize*2,     mmsize*3, 1
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+fpel_fn avg, 32, strideq, strideq*2, stride3q, 4, 8
+fpel_fn avg, 64, mmsize,  strideq,   strideq+mmsize, 2, 8
+%endif
+INIT_MMX mmxext
+fpel_fn avg,  8,  strideq, strideq*2, stride3q, 4, 16
+INIT_XMM sse2
+fpel_fn avg,  16, strideq, strideq*2, stride3q, 4, 16
+fpel_fn avg,  32, mmsize,  strideq,   strideq+mmsize, 2, 16
+fpel_fn avg,  64, mmsize,  mmsize*2,  mmsize*3, 1, 16
+fpel_fn avg, 128, mmsize,  mmsize*2,  mmsize*3, 1, 16, 8
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+fpel_fn avg,  32, strideq, strideq*2, stride3q, 4, 16
+fpel_fn avg,  64, mmsize,  strideq,   strideq+mmsize, 2, 16
+fpel_fn avg, 128, mmsize,  mmsize*2,  mmsize*3, 1, 16
+%endif
+%undef s16
+%undef d16
+%undef s32
+%undef d32
diff --git a/media/ffvpx/libavcodec/x86/vp9mc_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9mc_16bpp.asm
new file mode 100644
index 000000000..9a462eaf8
--- /dev/null
+++ b/media/ffvpx/libavcodec/x86/vp9mc_16bpp.asm
@@ -0,0 +1,431 @@
+;******************************************************************************
+;* VP9 MC SIMD optimizations
+;*
+;* Copyright (c) 2015 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pd_64: times 8 dd 64
+
+cextern pw_1023
+cextern pw_4095
+
+SECTION .text
+
+%macro filter_h4_fn 1-2 12
+cglobal vp9_%1_8tap_1d_h_4_10, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movh        m0, [srcq-6]
+    movh        m1, [srcq-4]
+    movh        m2, [srcq-2]
+    movh        m3, [srcq+0]
+    movh        m4, [srcq+2]
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    pmaddwd     m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+%else
+    pmaddwd     m2, [filteryq+32]
+%endif
+    movu        m1, [srcq+4]
+    movu        m3, [srcq+6]
+    paddd       m0, m2
+    movu        m2, [srcq+8]
+    add       srcq, sstrideq
+    punpcklwd   m4, m1
+    punpcklwd   m3, m2
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m3, m10
+%else
+    pmaddwd     m4, [filteryq+64]
+    pmaddwd     m3, [filteryq+96]
+%endif
+    paddd       m0, m4
+    paddd       m0, m3
+    paddd       m0, m6
+    psrad       m0, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+%else
+    packssdw    m0, m0
+%endif
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+cglobal vp9_%1_8tap_1d_h_4_12, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_4_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_h4_fn put
+filter_h4_fn avg
+
+%macro filter_h_fn 1-2 12
+%assign %%px mmsize/2
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _10, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    mova        m7, [filteryq+ 0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+32]
+    mova        m9, [filteryq+64]
+    mova       m10, [filteryq+96]
+%endif
+.loop:
+    movu        m0, [srcq-6]
+    movu        m1, [srcq-4]
+    movu        m2, [srcq-2]
+    movu        m3, [srcq+0]
+    movu        m4, [srcq+2]
+    pmaddwd     m0, m7
+    pmaddwd     m1, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+    pmaddwd     m3, m8
+    pmaddwd     m4, m9
+%else
+    pmaddwd     m2, [filteryq+32]
+    pmaddwd     m3, [filteryq+32]
+    pmaddwd     m4, [filteryq+64]
+%endif
+    paddd       m0, m2
+    paddd       m1, m3
+    paddd       m0, m4
+    movu        m2, [srcq+4]
+    movu        m3, [srcq+6]
+    movu        m4, [srcq+8]
+    add       srcq, sstrideq
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m9
+    pmaddwd     m3, m10
+    pmaddwd     m4, m10
+%else
+    pmaddwd     m2, [filteryq+64]
+    pmaddwd     m3, [filteryq+96]
+    pmaddwd     m4, [filteryq+96]
+%endif
+    paddd       m1, m2
+    paddd       m0, m3
+    paddd       m1, m4
+    paddd       m0, m6
+    paddd       m1, m6
+    psrad       m0, 7
+    psrad       m1, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+    packusdw    m1, m1
+%else
+    packssdw    m0, m0
+    packssdw    m1, m1
+%endif
+    punpcklwd   m0, m1
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+cglobal vp9_%1_8tap_1d_h_ %+ %%px %+ _12, 6, 6, %2, dst, dstride, src, sstride, h, filtery
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_h_ %+ %%px %+ _10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_h_fn put
+filter_h_fn avg
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_h_fn put
+filter_h_fn avg
+%endif
+
+%macro filter_v4_fn 1-2 12
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_4_10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_4_10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m11, m11
+%endif
+    mova        m6, [pd_64]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 32]
+    mova        m9, [filteryq+ 64]
+    mova       m10, [filteryq+ 96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movh        m0, [srcq]
+    movh        m1, [srcq+sstrideq]
+    movh        m2, [srcq+sstrideq*2]
+    movh        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movh        m4, [src4q]
+    punpcklwd   m0, m1
+    punpcklwd   m2, m3
+    pmaddwd     m0, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+%else
+    pmaddwd     m2, [filteryq+ 32]
+%endif
+    movh        m1, [src4q+sstrideq]
+    movh        m3, [src4q+sstrideq*2]
+    paddd       m0, m2
+    movh        m2, [src4q+sstride3q]
+    add      src4q, sstrideq
+    punpcklwd   m4, m1
+    punpcklwd   m3, m2
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m3, m10
+%else
+    pmaddwd     m4, [filteryq+ 64]
+    pmaddwd     m3, [filteryq+ 96]
+%endif
+    paddd       m0, m4
+    paddd       m0, m3
+    paddd       m0, m6
+    psrad       m0, 7
+%if cpuflag(sse4)
+    packusdw    m0, m0
+%else
+    packssdw    m0, m0
+%endif
+%ifidn %1, avg
+    movh        m1, [dstq]
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m11
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, m1
+%endif
+    movh    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_4_12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_4_12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%endif
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_4_10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_v4_fn put
+filter_v4_fn avg
+
+%macro filter_v_fn 1-2 13
+%assign %%px mmsize/2
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _10, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%define hd r4mp
+%endif
+    mova        m5, [pw_1023]
+.body:
+%if notcpuflag(sse4) && ARCH_X86_64
+    pxor       m12, m12
+%endif
+%if ARCH_X86_64
+    mova       m11, [pd_64]
+%endif
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m7, [filteryq+  0]
+%if ARCH_X86_64 && mmsize > 8
+    mova        m8, [filteryq+ 32]
+    mova        m9, [filteryq+ 64]
+    mova       m10, [filteryq+ 96]
+%endif
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movu        m0, [srcq]
+    movu        m1, [srcq+sstrideq]
+    movu        m2, [srcq+sstrideq*2]
+    movu        m3, [srcq+sstride3q]
+    add       srcq, sstrideq
+    movu        m4, [src4q]
+    SBUTTERFLY  wd, 0, 1, 6
+    SBUTTERFLY  wd, 2, 3, 6
+    pmaddwd     m0, m7
+    pmaddwd     m1, m7
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m2, m8
+    pmaddwd     m3, m8
+%else
+    pmaddwd     m2, [filteryq+ 32]
+    pmaddwd     m3, [filteryq+ 32]
+%endif
+    paddd       m0, m2
+    paddd       m1, m3
+    movu        m2, [src4q+sstrideq]
+    movu        m3, [src4q+sstrideq*2]
+    SBUTTERFLY  wd, 4, 2, 6
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m4, m9
+    pmaddwd     m2, m9
+%else
+    pmaddwd     m4, [filteryq+ 64]
+    pmaddwd     m2, [filteryq+ 64]
+%endif
+    paddd       m0, m4
+    paddd       m1, m2
+    movu        m4, [src4q+sstride3q]
+    add      src4q, sstrideq
+    SBUTTERFLY  wd, 3, 4, 6
+%if ARCH_X86_64 && mmsize > 8
+    pmaddwd     m3, m10
+    pmaddwd     m4, m10
+%else
+    pmaddwd     m3, [filteryq+ 96]
+    pmaddwd     m4, [filteryq+ 96]
+%endif
+    paddd       m0, m3
+    paddd       m1, m4
+%if ARCH_X86_64
+    paddd       m0, m11
+    paddd       m1, m11
+%else
+    paddd       m0, [pd_64]
+    paddd       m1, [pd_64]
+%endif
+    psrad       m0, 7
+    psrad       m1, 7
+%if cpuflag(sse4)
+    packusdw    m0, m1
+%else
+    packssdw    m0, m1
+%endif
+    pminsw      m0, m5
+%if notcpuflag(sse4)
+%if ARCH_X86_64
+    pmaxsw      m0, m12
+%else
+    pxor        m2, m2
+    pmaxsw      m0, m2
+%endif
+%endif
+%ifidn %1, avg
+    pavgw       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 6, 8, %2, dst, dstride, src, sstride, h, filtery, src4, sstride3
+%else
+cglobal vp9_%1_8tap_1d_v_ %+ %%px %+ _12, 4, 7, %2, dst, dstride, src, sstride, filtery, src4, sstride3
+    mov   filteryq, r5mp
+%endif
+    mova        m5, [pw_4095]
+    jmp mangle(private_prefix %+ _ %+ vp9_%1_8tap_1d_v_ %+ %%px %+ _10 %+ SUFFIX).body
+%endmacro
+
+INIT_XMM sse2
+filter_v_fn put
+filter_v_fn avg
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+filter_v_fn put
+filter_v_fn avg
+%endif
diff --git a/media/ffvpx/libavcodec/xiph.c b/media/ffvpx/libavcodec/xiph.c
new file mode 100644
index 000000000..d072224b4
--- /dev/null
+++ b/media/ffvpx/libavcodec/xiph.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "xiph.h"
+
+int avpriv_split_xiph_headers(const uint8_t *extradata, int extradata_size,
+                          int first_header_size, const uint8_t *header_start[3],
+                          int header_len[3])
+{
+    int i;
+
+    if (extradata_size >= 6 && AV_RB16(extradata) == first_header_size) {
+        int overall_len = 6;
+        for (i=0; i<3; i++) {
+            header_len[i] = AV_RB16(extradata);
+            extradata += 2;
+            header_start[i] = extradata;
+            extradata += header_len[i];
+            if (overall_len > extradata_size - header_len[i])
+                return -1;
+            overall_len += header_len[i];
+        }
+    } else if (extradata_size >= 3 && extradata_size < INT_MAX - 0x1ff && extradata[0] == 2) {
+        int overall_len = 3;
+        extradata++;
+        for (i=0; i<2; i++, extradata++) {
+            header_len[i] = 0;
+            for (; overall_len < extradata_size && *extradata==0xff; extradata++) {
+                header_len[i] += 0xff;
+                overall_len   += 0xff + 1;
+            }
+            header_len[i] += *extradata;
+            overall_len   += *extradata;
+            if (overall_len > extradata_size)
+                return -1;
+        }
+        header_len[2] = extradata_size - overall_len;
+        header_start[0] = extradata;
+        header_start[1] = header_start[0] + header_len[0];
+        header_start[2] = header_start[1] + header_len[1];
+    } else {
+        return -1;
+    }
+    return 0;
+}
diff --git a/media/ffvpx/libavcodec/xiph.h b/media/ffvpx/libavcodec/xiph.h
new file mode 100644
index 000000000..1741a51b6
--- /dev/null
+++ b/media/ffvpx/libavcodec/xiph.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_XIPH_H
+#define AVCODEC_XIPH_H
+
+#include "libavutil/common.h"
+
+/**
+ * Split a single extradata buffer into the three headers that most
+ * Xiph codecs use. (e.g. Theora and Vorbis)
+ * Works both with Matroska's packing and lavc's packing.
+ *
+ * @param[in] extradata The single chunk that combines all three headers
+ * @param[in] extradata_size The size of the extradata buffer
+ * @param[in] first_header_size The size of the first header, used to
+ * differentiate between the Matroska packing and lavc packing.
+ * @param[out] header_start Pointers to the start of the three separate headers.
+ * @param[out] header_len The sizes of each of the three headers.
+ * @return On error a negative value is returned, on success zero.
+ */
+int avpriv_split_xiph_headers(const uint8_t *extradata, int extradata_size,
+                              int first_header_size, const uint8_t *header_start[3],
+                              int header_len[3]);
+
+#endif /* AVCODEC_XIPH_H */